In [1]:
pip install pathway fastapi uvicorn faiss-cpu requests transformers pandas


Collecting pathway
  Downloading pathway-0.20.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting h3>=4 (from pathway)
  Downloading h3-4.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting sqlglot==10.6.1 (from pathway)
  Downloading sqlglot-10.6.1-py3-none-any.whl.metadata (14 kB)
Collecting python-sat>=0.1.8.dev0 (from pathway)
  Downloading python_sat-1.8.dev16-cp311-cp311-manylinux_2_17_x86_64.manylinu

In [2]:
import pathway as pw
import faiss
import requests
import json
import pandas as pd
from fastapi import FastAPI
from transformers import pipeline


In [3]:
def fetch_virus_total_data(api_key, domain):
    url = f"https://www.virustotal.com/api/v3/domains/{domain}"
    headers = {"x-apikey": api_key}
    response = requests.get(url, headers=headers)
    return response.json() if response.status_code == 200 else None

def fetch_shodan_data(api_key, ip):
    url = f"https://api.shodan.io/shodan/host/{ip}?key={api_key}"
    response = requests.get(url)
    return response.json() if response.status_code == 200 else None

In [4]:
class ThreatDatabase:
    def __init__(self, vector_dim=512):
        self.index = faiss.IndexFlatL2(vector_dim)
        self.data = []

    def add_threat(self, threat_vector, threat_info):
        self.index.add(threat_vector)
        self.data.append(threat_info)

    def search_threats(self, query_vector, top_k=5):
        _, indices = self.index.search(query_vector, top_k)
        return [self.data[i] for i in indices[0]]


In [5]:
threat_classifier = pipeline("text-classification", model="distilbert-base-uncased")

def classify_threat(description):
    result = threat_classifier(description)
    return result[0]["label"], result[0]["score"]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu


In [6]:
class ThreatIntelligenceStream:
    @pw.udf
    def process_data(data):
        return json.dumps(data)

    def __init__(self):
        self.threat_stream = pw.io.subscribe_to_kafka("threat_topic", format="json")

        self.processed_stream = (
            self.threat_stream
            .map(self.process_data)
            .map(lambda x: json.loads(x))
        )

    def run(self):
        self.processed_stream.sink(pw.io.stdout_sink())


In [8]:
pip install nest_asyncio




In [9]:
import nest_asyncio
import uvicorn
nest_asyncio.apply()  # Allows Uvicorn to run inside Jupyter Notebook
uvicorn.run(app, host="0.0.0.0", port=8000, loop="asyncio")

INFO:     Started server process [428]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [428]


In [10]:
app = FastAPI()
threat_db = ThreatDatabase()

@app.post("/analyze_threat")
def analyze_threat(ip: str):
    threat_info = fetch_shodan_data("your_api_key", ip)
    if threat_info:
        label, score = classify_threat(threat_info["data"])
        return {"Threat Level": label, "Confidence Score": score}
    return {"message": "No threat detected"}

@app.get("/search_threat")
def search_threat(query: str):
    query_vector = faiss.vector_to_array(threat_classifier(query))
    results = threat_db.search_threats(query_vector)
    return {"matching_threats": results}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [428]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [428]


In [12]:
!uvicorn main:app --reload

[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://127.0.0.1:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m2631[0m] using [36m[1mStatReload[0m
[31mERROR[0m:    Error loading ASGI app. Could not import module "main".
[32mINFO[0m:     Stopping reloader process [[36m[1m2631[0m]
^C
