In [3]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import joblib

# Sample data (replace this with your actual dataset)
data = {
    'Timestamp': [
        '2025-03-10 10:00:00', '2025-03-10 10:01:00', '2025-03-10 10:02:00',
        '2025-03-10 10:03:00', '2025-03-10 10:04:00', '2025-03-10 10:05:00'
    ],
    'Source IP': ['192.168.1.10'] * 6,
    'Destination IP': ['203.0.113.5', '203.0.113.5', '203.0.113.6', 
                      '203.0.113.7', '203.0.113.6', '203.0.113.7'],
    'Protocol': ['TCP', 'TCP', 'TCP', 'UDP', 'TCP', 'TCP'],
    'Source Port': [54321] * 6,
    'Destination Port': [80, 80, 443, 53, 80, 22],
    'Packet Size (bytes)': [500, 600, 800, 100, 900, 2000],
    'Bytes Sent': [200, 200, 500, 100, 400, 0],
    'Bytes Received': [150, 300, 400, 50, 200, 2000],
    'Duration (ms)': [200, 250, 300, 50, 150, 500]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert Timestamp to DateTime and drop it for modeling
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.drop(['Timestamp', 'Source IP', 'Destination IP', 'Protocol'], axis=1, inplace=True)

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(df)

# Train Isolation Forest model
model = IsolationForest(contamination=0.1, random_state=42)  # Adjust contamination as needed
model.fit(X)

# Save the trained model and scaler
joblib.dump(model, 'anomaly_detection_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("Model training complete and saved.")

Model training complete and saved.


In [2]:
import pandas as pd
import joblib
import numpy as np

# Load the saved model and scaler
model = joblib.load('anomaly_detection_model.pkl')
scaler = joblib.load('scaler.pkl')

# Load new data (replace this with your actual data source)
# Example CSV file: new_network_traffic.csv
new_data = {
    'Source Port': [54321, 54321, 54321],
    'Destination Port': [80, 443, 22],
    'Packet Size (bytes)': [500, 900, 2000],
    'Bytes Sent': [200, 400, 0],
    'Bytes Received': [150, 200, 2000],
    'Duration (ms)': [200, 150, 500]
}
new_df = pd.DataFrame(new_data)

# Standardize the new data
X_new = scaler.transform(new_df)

# Predict anomalies
new_df['Anomaly'] = model.predict(X_new)
new_df['Anomaly'] = np.where(new_df['Anomaly'] == -1, 'Anomaly', 'Normal')

# Output results
print(new_df)

def alert_anomalies(dataframe):
    anomalies = dataframe[dataframe['Anomaly'] == 'Anomaly']
    if not anomalies.empty:
        print("Alert! Anomalies detected:")
        print(anomalies)

# Call the alert function
alert_anomalies(new_df)

   Source Port  Destination Port  Packet Size (bytes)  Bytes Sent  \
0        54321                80                  500         200   
1        54321               443                  900         400   
2        54321                22                 2000           0   

   Bytes Received  Duration (ms) Anomaly  
0             150            200  Normal  
1             200            150  Normal  
2            2000            500  Normal  
