In [None]:
#!pip install -q -U google-genai

In [None]:
# import
import requests
import json
from pydantic import *
from typing import Optional, List
from enum import Enum
import re
import random
from google import genai
import time

In [None]:
class Entity_Type_Enum(str, Enum):
    Time = "Time" # Refers to specific points or ranges when something occurs, such as dates, times, or periods (e.g., '2024-05-27', 'today', 'last year', 'June 2017 - August 2018').
    Vulnerability = "Vulnerability" # Represents a flaw or weakness in a system that can be exploited (e.g., 'CVE-2021-34527', 'Broken authentication').
    OS = "OS" # Operating Systems, managing hardware/software resources (e.g., 'Windows 10', 'Ubuntu', 'iOS').
    Product = "Product" # Software products developed for use or sale (e.g., 'Google Chrome', 'Adobe Photoshop').
    Device = "Device" # Specific equipment used for tasks (e.g., 'server', 'WS-C4503-E').
    Location = "Location" # Geographical places or positions (e.g., 'Los Angeles', 'Tehran').
    IP = "IP" # Unique identifiers for network devices (e.g., '192.168.1.1', '2001:0db8:85a3:0000:0000:8a2e:0370:7334').
    URL = "URL" # Web page addresses on the internet (e.g., 'https://www.freecodecamp.org').
    Domain = "Domain" # Domains of URLs (e.g., 'google.co.uk').
    Email = "Email" # Email addresses (e.g., 'example@gmail.com').
    Organization = "Organization" # Companies or institutions with a specific purpose (e.g., 'Yahoo', 'Yale University').
    Plugin = "Plugin" # Software components that extend existing programs (e.g., 'WooCommerce', 'WordPress Importer').
    Library = "Library" # Precompiled code used by programs (e.g., 'pyqt5', 'flask').
    Security_Team = "Security_Team" # Groups responsible for cybersecurity (e.g., 'MicrosoftSecurityTeam').
    APT_Group = "APT-Group" # Advanced Persistent Threat groups.
    Hacker_Team = "Hacker_Team" # Represents a team of hackers.
    Campaign = "Campaign" # Coordinated efforts or activities.    
    Defense_Mechanism = "Defense_Mechanism" # Tools protecting against cyberattacks (e.g., 'AVG Firewall', 'WAF').
    Filepath = "Filepath" # Locations of files in a computer system (e.g., 'C:\\d\\admin/index.php').
    Person = "Person" # Individual human beings or usernames (e.g., 'Elon Musk', '@Elon_Musk').
    Protocol = "Protocol" # Rules governing data exchange (e.g., 'FTP', 'SMTP').
    Tools = "Tools" # Software or instruments used for analysis or attacks (e.g., 'Wireshark', 'Metasploit').
    Platform = "Platform" # Technologies supporting application development (e.g., 'WordPress').
    Hash = "Hash" # Fixed-size alphanumeric strings used for verification (e.g., '00236a2ae558018ed13b5222ef1bd987').
    Malware = "Malware" # Malicious software (e.g., 'BlackEnergy').
    Attack = "Attack" # Actions carried out to exploit systems (e.g., 'brute force attack').
    Damage = "Damage" # Damage refers to the harm or impact resulting from an attack, like data breaches.
    

    def __str__(self):
        return self.value

class Entity(BaseModel):
    Token : Optional[str] = Field("", description = "The token or text representing the entity.")
    Type : Optional[Entity_Type_Enum] = Field(None, description = "The type of the entity, as defined in Entity_Type_Enum.")
    Start_Index : Optional[int] = Field(0, description = "The starting index of the entity in the text.")
    End_Index : Optional[int] = Field(0, description = "The ending index of the entity in the text.")

class Relation(BaseModel):
    From: Entity = Field(description="The originating entity in the relationship, representing the entity from which the relationship is directed.")
    To: Entity = Field(description="The destination entity in the relationship, representing the entity toward which the relationship is directed.")
    Type: str = Field(description="A descriptor indicating the nature or category of the relationship between the two entities (e.g., 'owns', 'manages', 'located in').")

class Entities(BaseModel):
    Entities    : Optional[List[Entity]]     = Field([], description="A list of entities identified in the text.")
    Relations   : Optional[List[Relation]]   = Field([], description="A list of relations between entities.")

class EntitiesResponseModel(BaseModel):
    text: Optional[str] = None
    label : Optional[Entities] = None

def model_to_json_schema(model: BaseModel):
    return model.schema_json(indent=4)

EntitiesTypes = ["Time","Vulnerability","OS","Product","Organization","Plugin","Library","Security_Team","Hacker_Team","Device","Defense_Mechanism","Location","URL","IP","Filepath","Person","Protocol","Tools","Platform","Hash","Malware","Attack","Domain","Damage","Email","APT-Group","Campaign"]

def _get_template3(text: str, format_instructions: str):
    type_entities_str = " , ".join(EntitiesTypes)
    a = f"""You are an advanced and intelligent Named Entity Recognition (NER) and Relation Extraction system. Your task is to identify and extract both entities and the relationships between them from the text I provide, using the specific definitions and descriptions outlined below.

I will provide you with:
1. **A list of entities** with precise descriptions.
2. **A text sample** from which you will extract both entities and their relationships.
3. **An output format** in which your response should be structured.
4. **Dont write Any Note** Output just Json Format and dont wite any Note.

### Entity Descriptions:
- **Time**: Refers to specific points or ranges when something occurs, such as dates, times, or periods (e.g., '2024-05-27', 'today', 'last year', 'June 2017 - August 2018').
- **Vulnerability**: Represents a flaw or weakness in a system that can be exploited (e.g., 'CVE-2021-34527', 'Broken authentication').
- **OS**: Operating Systems, managing hardware/software resources (e.g., 'Windows 10', 'Ubuntu', 'iOS').
- **Product**: Software products developed for use or sale (e.g., 'Google Chrome', 'Adobe Photoshop').
- **Device**: Specific equipment used for tasks (e.g., 'server', 'WS-C4503-E').
- **Location**: Geographical places or positions (e.g., 'Los Angeles', 'Tehran').
- **IP**: Unique identifiers for network devices (e.g., '192.168.1.1', '2001:0db8:85a3:0000:0000:8a2e:0370:7334').
- **URL**: Web page addresses on the internet (e.g., 'https://www.freecodecamp.org').
- **Domain**: Domains of URLs (e.g., 'google.co.uk').
- **Email**: Email addresses (e.g., 'example@gmail.com').
- **Organization**: Companies or institutions with a specific purpose (e.g., 'Yahoo', 'Yale University').
- **Plugin**: Software components that extend existing programs (e.g., 'WooCommerce', 'WordPress Importer').
- **Library**: Precompiled code used by programs (e.g., 'pyqt5', 'flask').
- **Security_Team**: Groups responsible for cybersecurity (e.g., 'MicrosoftSecurityTeam').
- **APT-Group**: Advanced Persistent Threat groups.
- **Hacker_Team**: Groups skilled in breaking into systems (e.g., 'Anonymous').
- **Campaign**: Coordinated efforts or activities.
- **Defense_Mechanism**: Tools protecting against cyberattacks (e.g., 'AVG Firewall', 'WAF').
- **Filepath**: Locations of files in a computer system (e.g., 'C:\\d\\admin/index.php').
- **Person**: Individual human beings or usernames (e.g., 'Elon Musk', '@Elon_Musk').
- **Protocol**: Rules governing data exchange (e.g., 'FTP', 'SMTP').
- **Tools**: Software or instruments used for analysis or attacks (e.g., 'Wireshark', 'Metasploit').
- **Platform**: Technologies supporting application development (e.g., 'WordPress').
- **Hash**: Fixed-size alphanumeric strings used for verification (e.g., '00236a2ae558018ed13b5222ef1bd987').
- **Malware**: Malicious software (e.g., 'BlackEnergy').
- **Attack**: Actions carried out to exploit systems (e.g., 'brute force attack').
- **Damage**: Damage refers to the harm or impact resulting from an attack, like data breaches.

### Relationship Descriptions:
- **Type**: The nature of the relationship between entities (e.g., 'owns', 'caused by', 'located in', 'connected to').

### Task:
Given the text below, enclosed by triple backticks, extract all relevant entities and categorize them according to the above definitions. Additionally, identify any relationships between entities and specify the relationship type. If an entity or relationship appears multiple times, list each occurrence.

### Format:
Your response should follow this JSON schema:
{format_instructions}

### Text:
```{text}```

Type of Entity:
{type_entities_str}

Relations:
0:  'Others',
1:  "targets",
2:  "used",
3:  "associated with",
4:  'located in',
5:  'exploiting',
6 : 'has vulnerability',
7:  'created',
8:  'attacked',
9 : 'can lead to'

output sample:
{{
  "text": "Phishing attack on user@example.com",
  "label": {{
    "Entities": [
        {{ "Token": "Phishing attack", "Type": "Attack"}},
        {{ "Token": "user@example.com", "Type": "Email"}}
    ],
    "Relations": [
      {{
        "From": {{ "Token": "Phishing attack", "Type": "Attack" }},
        "To": {{ "Token": "user@example.com", "Type": "Email" }},
        "Type": "on"
      }}
    ]
  }}
}}
"""

def remove_multiple_space(text:str):
    return re.sub(r'\s+', ' ', text).strip()

def generate_prompt(text: str):
    schema = model_to_json_schema(EntitiesResponseModel)
    return remove_multiple_space(_get_template3(text, schema))

In [None]:
api_key = "AIzaSyAFESmujt723gkEHSWeWw92MYISVoK6ELc"
gemini_model_name = "gemini-2.0-flash"
get_data_url = "https://lifti.ir/api/get_data"
post_data_url = "https://lifti.ir/api/label"

def label():
    try:
        response = requests.get(get_data_url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)

        _data = json.loads(response.text)
        id = _data.get("id")
        text = _data.get("text")
        print(f"id: {id}\ndata:{text[:20]}")

        if id is None or text is None:
            print("Error: 'id' or 'text' missing from the API response.")
            exit(1)

        _query = generate_prompt(text).strip()

        client = genai.Client(api_key=api_key)
        response = client.models.generate_content(
            model=gemini_model_name, contents=_query
        )

        label = response.text.removeprefix("```json").removesuffix("```").strip()
        
        update_data = {
            "id": id,
            "label": label
        }
        
        label1 = remove_multiple_space(label.replace("\n"," ").replace("\t"," "))
        if label1:
            if len(label1)>50:
                print(f"Label: {label1[:50]}\n__________")
            else:
                print(f"Label: {label1}")

        post_response = requests.post(post_data_url, json=update_data)
        post_response.raise_for_status() #check for post errors as well.
        print(f"Data posted successfully. Status code: {post_response.status_code}")

    except requests.exceptions.RequestException as e:
        print(f"Error during request: {e}")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")



In [None]:
label()

In [None]:
for i in range(500):
    label()
    time.sleep(2)