# üõ°Ô∏è Cadet Cyber Mission: Threat Detector (Cadet Edition)
Welcome aboard, cadet! Today you're going to become a **Cyber Defender** by training a computer to detect whether network traffic is **Benign** (safe) or **Malicious** (dangerous).

Let's go step by step, like a real mission. Stay sharp and follow the orders!


In [None]:
# ‚úÖ MISSION STEP 1: Import Tools
# These are special toolkits that help us do data science
import pandas as pd                      # Helps us handle tables of data
import numpy as np                       # Helps with numbers and math
import matplotlib.pyplot as plt          # Helps us draw charts
from sklearn.model_selection import train_test_split  # Helps us split data into training/testing
from sklearn.ensemble import RandomForestClassifier   # This is our smart model
from sklearn.preprocessing import LabelEncoder        # Helps turn words into numbers
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score  # To measure how well we did


In [None]:
# ‚úÖ MISSION STEP 2: Create Fake Cyber Traffic
# We're making a pretend dataset of network connections with features like port, protocol, size, etc.
# Some are safe, some are dangerous.
data = {
    'Protocol': ['TCP', 'UDP', 'TCP', 'TCP', 'UDP', 'TCP', 'TCP', 'UDP', 'TCP', 'UDP'] * 10,
    'Src_Port': np.random.randint(1000, 5000, size=100),  # Random source port
    'Dst_Port': np.random.randint(20, 1024, size=100),    # Random destination port
    'Packet_Size': np.random.randint(40, 1500, size=100), # Size of the packet
    'Duration': np.random.rand(100) * 10,                 # How long the connection lasted
    'Label': ['Benign', 'Malicious', 'Benign', 'Benign', 'Malicious',
              'Benign', 'Malicious', 'Benign', 'Malicious', 'Benign'] * 10
}
df = pd.DataFrame(data)  # Put it all into a table
df.head()  # Show the first few rows


In [None]:
# ‚úÖ MISSION STEP 3: Turn Words into Numbers
# Computers can't understand words like "TCP" or "Malicious", so we convert them into numbers.

le = LabelEncoder()
df['Protocol'] = le.fit_transform(df['Protocol'])  # TCP becomes 1, UDP becomes 0 (or vice versa)
df['Label'] = df['Label'].map({'Benign': 0, 'Malicious': 1})  # Benign = 0, Malicious = 1
df.head()


In [None]:
# ‚úÖ MISSION STEP 4: Split Data for Training and Testing
# We'll use part of the data to train the computer, and the rest to test if it learned well.

X = df.drop('Label', axis=1)  # Features (everything except the answer)
y = df['Label']               # Labels (the answers we want to predict)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)  # 70% train, 30% test


In [None]:
# ‚úÖ MISSION STEP 5: Train the Model (Random Forest = Smart Brain)
# We give the computer examples of safe and dangerous traffic, and it learns the pattern.

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)  # Ask the computer to make predictions


In [None]:
# ‚úÖ MISSION STEP 6: See How Well the Computer Did
# We measure how many predictions it got right vs. wrong.

print("üßÆ Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nüßæ Confusion Matrix (Actual vs Predicted):\n", confusion_matrix(y_test, y_pred))
print("\nüìã Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
# ‚úÖ MISSION STEP 7: Visualize the Results
# Let‚Äôs draw a simple bar chart to show how many were predicted as Benign vs Malicious.

plt.figure(figsize=(6,4))
plt.bar(['Benign', 'Malicious'], [list(y_pred).count(0), list(y_pred).count(1)])
plt.title('üîç Threat Detector: Prediction Results')
plt.ylabel('Number of Connections')
plt.tight_layout()
plt.show()
