In [1]:
pip install pandas numpy scikit-learn gradio flask fastapi uvicorn joblib


Collecting gradio
  Downloading gradio-6.1.0-py3-none-any.whl.metadata (16 kB)
Collecting flask
  Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting fastapi
  Downloading fastapi-0.124.4-py3-none-any.whl.metadata (30 kB)
Collecting uvicorn
  Downloading uvicorn-0.38.0-py3-none-any.whl.metadata (6.8 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting anyio<5.0,>=3.0 (from gradio)
  Downloading anyio-4.12.0-py3-none-any.whl.metadata (4.3 kB)
Collecting audioop-lts<1.0 (from gradio)
  Downloading audioop_lts-0.2.2-cp313-abi3-win_amd64.whl.metadata (2.0 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading brotli-1.2.0-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-1.0.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==2.0.1 (from gradio)
  Downloading gradio_client-2.0.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gr


[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib

In [3]:
# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv(r"C:\Users\line\AppData\Local\Temp\f960372d-9a70-452c-a086-7339a9fcab4d_archive.zip.b4d\cybersecurity_attacks.csv")

In [6]:
df.head().T

Unnamed: 0,0,1,2,3,4
Timestamp,2023-05-30 06:33:58,2020-08-26 07:08:30,2022-11-13 08:23:25,2023-07-02 10:38:46,2023-07-16 13:11:07
Source IP Address,103.216.15.12,78.199.217.198,63.79.210.48,163.42.196.10,71.166.185.76
Destination IP Address,84.9.164.252,66.191.137.154,198.219.82.17,101.228.192.255,189.243.174.238
Source Port,31225,17245,16811,20018,6131
Destination Port,17616,48166,53600,32534,26646
Protocol,ICMP,ICMP,UDP,UDP,TCP
Packet Length,503,1174,306,385,1462
Packet Type,Data,Data,Control,Data,Data
Traffic Type,HTTP,HTTP,HTTP,HTTP,DNS
Payload Data,Qui natus odio asperiores nam. Optio nobis ius...,Aperiam quos modi officiis veritatis rem. Omni...,Perferendis sapiente vitae soluta. Hic delectu...,Totam maxime beatae expedita explicabo porro l...,Odit nesciunt dolorem nisi iste iusto. Animi v...


In [7]:
# List Columns
df.columns

Index(['Timestamp', 'Source IP Address', 'Destination IP Address',
       'Source Port', 'Destination Port', 'Protocol', 'Packet Length',
       'Packet Type', 'Traffic Type', 'Payload Data', 'Malware Indicators',
       'Action Taken', 'Severity Level', 'User Information',
       'Device Information', 'Network Segment', 'Geo-location Data',
       'Proxy Information', 'Firewall Logs', 'IDS/IPS Alerts', 'Log Source'],
      dtype='object')

In [8]:
# Shape of data
print(f"There are {df.shape[0]}, row and {df.shape[1]} columns in the dataset")

There are 40000, row and 25 columns in the dataset


In [9]:
# Dataset Info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40000 entries, 0 to 39999
Data columns (total 25 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Timestamp               40000 non-null  object 
 1   Source IP Address       40000 non-null  object 
 2   Destination IP Address  40000 non-null  object 
 3   Source Port             40000 non-null  int64  
 4   Destination Port        40000 non-null  int64  
 5   Protocol                40000 non-null  object 
 6   Packet Length           40000 non-null  int64  
 7   Packet Type             40000 non-null  object 
 8   Traffic Type            40000 non-null  object 
 9   Payload Data            40000 non-null  object 
 10  Malware Indicators      20000 non-null  object 
 11  Anomaly Scores          40000 non-null  float64
 13  Attack Type             40000 non-null  object 
 14  Attack Signature        40000 non-null  object 
 15  Action Taken            40000 non-null

In [10]:
df.isnull().sum().sort_values(ascending=False)

IDS/IPS Alerts            20050
Malware Indicators        20000
Firewall Logs             19961
Proxy Information         19851
Source IP Address             0
Destination IP Address        0
Source Port                   0
Timestamp                     0
Traffic Type                  0
Packet Type                   0
Packet Length                 0
Protocol                      0
Destination Port              0
Attack Type                   0
Payload Data                  0
Anomaly Scores                0
Severity Level                0
Action Taken                  0
Attack Signature              0
User Information              0
Geo-location Data             0
Network Segment               0
Device Information            0
Log Source                    0
dtype: int64

In [11]:
# Missing Value by Percentage
df.isnull().sum() / len(df) * 100

Timestamp                  0.0000
Source IP Address          0.0000
Destination IP Address     0.0000
Source Port                0.0000
Destination Port           0.0000
Protocol                   0.0000
Packet Length              0.0000
Packet Type                0.0000
Traffic Type               0.0000
Payload Data               0.0000
Malware Indicators        50.0000
Anomaly Scores             0.0000
Attack Type                0.0000
Attack Signature           0.0000
Action Taken               0.0000
Severity Level             0.0000
User Information           0.0000
Device Information         0.0000
Network Segment            0.0000
Geo-location Data          0.0000
Proxy Information         49.6275
Firewall Logs             49.9025
IDS/IPS Alerts            50.1250
Log Source                 0.0000
dtype: float64

In [14]:
# Create target column
df["attack"] = df["Attack Type"].apply(lambda x: 0 if x == "Normal" else 1)

In [15]:
# Convert Attack Type to binary
df["attack"] = df["Attack Type"].apply(
    lambda x: 0 if x == "Normal" else 1
)

In [16]:
# Select important features
features = [
    "Source Port",
    "Destination Port",
    "Protocol",
    "Packet Length",
    "Packet Type",
    "Traffic Type",
    "Severity Level"
]

df = df[features + ["attack"]]

In [19]:
from sklearn.preprocessing import LabelEncoder


In [20]:
encoder = LabelEncoder()

categorical_columns = [
    "Protocol",
    "Packet Type",
    "Traffic Type",
    "Severity Level"
]

for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

print("Categorical columns encoded")

Categorical columns encoded


In [21]:
X = df.drop("attack", axis=1)
y = df["attack"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Data split into train and test")

Data split into train and test


In [22]:
model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

print("Model trained successfully")

Model trained successfully


In [23]:
joblib.dump(model, "cyber_attack_model.pkl")

print("Model saved as cyber_attack_model.pkl")

Model saved as cyber_attack_model.pkl


In [26]:
import gradio as gr
import joblib
import numpy as np

In [27]:
# Load trained model
model = joblib.load("cyber_attack_model.pkl")


In [28]:
def predict_attack(
    source_port,
    destination_port,
    protocol,
    packet_length,
    packet_type,
    traffic_type,
    severity_level
):
    input_data = np.array([[
        source_port,
        destination_port,
        protocol,
        packet_length,
        packet_type,
        traffic_type,
        severity_level
    ]])

    prediction = model.predict(input_data)[0]

    if prediction == 1:
        return "⚠️ CYBER ATTACK DETECTED"
    else:
        return "✅ NORMAL NETWORK TRAFFIC"
    

In [29]:
interface = gr.Interface(
    fn=predict_attack,
    inputs=[
        gr.Number(label="Source Port"),
        gr.Number(label="Destination Port"),
        gr.Number(label="Protocol (Encoded Number)"),
        gr.Number(label="Packet Length"),
        gr.Number(label="Packet Type (Encoded Number)"),
        gr.Number(label="Traffic Type (Encoded Number)"),
        gr.Number(label="Severity Level (Encoded Number)")
    ],
    outputs="text",
    title="Cyber Attack Prediction System",
    description="Machine Learning based Network Intrusion Detection"
)

interface.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




In [35]:
model = joblib.load("cyber_attack_model.pkl")


In [37]:
pip install flask


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [38]:
# ================================
# flask_app.py
# ================================

from flask import Flask, request, render_template_string
import joblib

app = Flask(__name__)

# Load trained ML model
model = joblib.load("cyber_attack_model.pkl")

# HTML template
HTML_TEMPLATE = """
<h2>Cyber Attack Prediction System</h2>

<form method="post">
  <input name="f1" placeholder="Source Port"><br><br>
  <input name="f2" placeholder="Destination Port"><br><br>
  <input name="f3" placeholder="Protocol (Encoded)"><br><br>
  <input name="f4" placeholder="Packet Length"><br><br>
  <input name="f5" placeholder="Packet Type (Encoded)"><br><br>
  <input name="f6" placeholder="Traffic Type (Encoded)"><br><br>
  <input name="f7" placeholder="Severity Level (Encoded)"><br><br>

  <button type="submit">Predict</button>
</form>

<h3>{{ result }}</h3>
"""

@app.route("/", methods=["GET", "POST"])
def home():
    result = ""

    if request.method == "POST":
        values = [float(v) for v in request.form.values()]
        prediction = model.predict([values])[0]

        if prediction == 1:
            result = "⚠️ CYBER ATTACK DETECTED"
        else:
            result = "✅ NORMAL NETWORK TRAFFIC"

    return render_template_string(HTML_TEMPLATE, result=result)

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

In [41]:
# fastapi_app.py

from fastapi import FastAPI
import joblib

app = FastAPI(
    title="Cyber Attack Prediction API",
    description="Machine Learning based Cyber Security API"
)

model = joblib.load("cyber_attack_model.pkl")

@app.post("/predict")
def predict_attack(data: list):
    prediction = model.predict([data])[0]

    if prediction == 1:
        return {"prediction": "CYBER ATTACK"}
    else:
        return {"prediction": "NORMAL"}
