In [4]:
import sqlite3
import pandas as pd
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
import os

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Initialize Mock Database
def init_db():
    conn = sqlite3.connect('cyber_threats.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS threats
                 (id INTEGER PRIMARY KEY, vulnerability TEXT, risk_level TEXT, mitigation TEXT)''')
    c.execute("INSERT OR IGNORE INTO threats VALUES (1, 'Phishing', 'High', 'Enable email filtering, train employees')")
    c.execute("INSERT OR IGNORE INTO threats VALUES (2, 'Ransomware', 'Critical', 'Implement MFA, regular backups')")
    conn.commit()
    return conn

# Query Database for Threat Data
def query_threat(vulnerability):
    conn = init_db()
    c = conn.cursor()
    c.execute("SELECT risk_level, mitigation FROM threats WHERE vulnerability = ?", (vulnerability,))
    result = c.fetchone()
    conn.close()
    return result if result else ("Unknown", "Consult a cybersecurity expert")


# LangChain for Agentic Workflow
def create_llm_chain():
    llm = HuggingFacePipeline.from_model_id(
        model_id='gpt2',
        task='text-generation',
        pipeline_kwargs={'max_length': 100, 'truncation': True}
    )
    
    prompt = PromptTemplate(
        input_variables=["user_input", "threat_data"],
        template="Given the user input: '{user_input}' and threat data: '{threat_data}', provide a concise cybersecurity risk assessment."
    )
    
    # Use RunnableSequence
    return RunnableSequence(prompt | llm)

# Test the Chatbot
def test_chatbot(user_input):
    # Mock vulnerability detection
    vulnerability = "Phishing" if "email" in user_input.lower() else "Ransomware"
    risk_level, mitigation = query_threat(vulnerability)
    threat_data = f"Vulnerability: {vulnerability}, Risk: {risk_level}, Mitigation: {mitigation}"
    
    # Generate response with LLM
    llm_chain = create_llm_chain()
    response = llm_chain.invoke({"user_input": user_input, "threat_data": threat_data})
    
    print(f"Input: {user_input}")
    print(f"Response: {response}")

# Test Run
if __name__ == "__main__":
    test_chatbot("We use cloud servers")

Device set to use cpu


Input: We use cloud servers
Response: Given the user input: 'We use cloud servers' and threat data: 'Vulnerability: Ransomware, Risk: Critical, Mitigation: Implement MFA, regular backups', provide a concise cybersecurity risk assessment. However, this list is limited in terms of how many vulnerabilities it deals with.

A vulnerability in Android phones: It is highly unlikely that the vulnerability in our application is used in the Android platform but that it could. In this section, we will examine more in-depth
