In [None]:
# Install the required dependencies for the code snippet
!pip install pandas scikit-learn numpy scipy matplotlib




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Load the dataset
data = pd.read_csv('/content/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')

# Label encoding for categorical variables
label_encoder = LabelEncoder()
data[' Label'] = label_encoder.fit_transform(data[' Label'])

# Separate features and labels
X = data.drop([' Label'], axis=1)  # Features
y = data[' Label']  # Labels

# Handle NaNs and infinite values
X = X.dropna()  # Drop rows with NaNs
X = X.replace([float('inf'), -float('inf')], 1e12)  # Replace infinite values

# Ensure 'y' has the same indices as 'X'
y = y[X.index]

# Reset indices to avoid misalignment
X.reset_index(drop=True, inplace=True)
y.reset_index(drop=True, inplace=True)

# Check the lengths of features and labels
print("Feature count:", len(X))
print("Label count:", len(y))

# Ensure consistent lengths
if len(X) == len(y):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize the Random Forest model
    rf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the model
    rf.fit(X_train, y_train)

    # Save the trained model
    joblib.dump(rf, 'random_forest_model.pkl')

    # Make predictions
    y_pred = rf.predict(X_test)

    # Generate a classification report
    report = classification_report(y_test, y_pred)
    print("Classification Report:")
    print(report)

    # Generate a confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(conf_matrix)
else:
    print("Feature and label lengths do not match.")


Feature count: 225741
Label count: 225741
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     19417
           1       1.00      1.00      1.00     25732

    accuracy                           1.00     45149
   macro avg       1.00      1.00      1.00     45149
weighted avg       1.00      1.00      1.00     45149

Confusion Matrix:
[[19417     0]
 [    8 25724]]


In [None]:
!pip install scapy



In [None]:
from scapy.all import sniff, TCP, Raw
import pandas as pd

# Initialize variables to store calculated values
dst_port = 0
last_packet_time = 0

# Function to process each packet and extract relevant information
def process_packet(packet):
    global dst_port, last_packet_time

    try:
        # Extract relevant fields from the packet
        dst_port = packet[TCP].dport

        # Calculate flow duration using packet timestamps
        flow_duration = packet.time - last_packet_time

        # Assuming you're interested in one-way traffic
        total_fwd_packets = 1 if packet.haslayer(TCP) and packet[TCP].sport == src_port else 0
        total_bwd_packets = 1 if packet.haslayer(TCP) and packet[TCP].dport == dst_port else 0

        # Get total length of forward packets (assuming payload length)
        total_fwd_length = len(packet[Raw].load) if packet.haslayer(Raw) else 0

        # Assuming no backward packets in this example
        total_bwd_length = 0

        # Get maximum packet length
        fwd_packet_length_max = len(packet)

        # Get minimum packet length (assuming all packets have the same length)
        fwd_packet_length_min = len(packet)

        # Get mean packet length (assuming all packets have the same length)
        fwd_packet_length_mean = len(packet)

        # Assuming standard deviation is 0 since all packets have the same length
        fwd_packet_length_std = 0

        # Update last packet time for flow duration calculation in the next packet
        last_packet_time = packet.time

       # Generate network traffic data
        network_data = pd.DataFrame(columns=[f'Destination Port_{i}' for i in range(1, num_cols + 1)],
                            index=range(1, num_rows + 1),
                            data=[[54865]*num_cols]*num_rows)

        # Create a DataFrame for the packet data
        packet_data = pd.DataFrame(data)

        # Append the packet data to the CSV file
        with open('network_traffic_data_5.csv', 'a') as f:
            packet_data.to_csv(f, header=f.tell()==0, index=False)  # Append without header if file exists

    except Exception as e:
        print(f"Error processing packet: {e}")

# Initialize a variable to store the source port (you need to define this)
src_port = 0

# Sniff network traffic and process each packet
sniff(prn=process_packet, count=10)  # Adjust the count as needed


<Sniffed: TCP:10 UDP:0 ICMP:0 Other:0>

In [None]:
import pandas as pd

# Define the number of rows and columns for the network traffic data
num_rows = 100  # Specify the number of rows
num_cols = 78   # Specify the number of Destination Port columns

# Generate network traffic data
network_data = pd.DataFrame(columns=[f'Destination Port_{i}' for i in range(1, num_cols + 1)],
                            index=range(1, num_rows + 1),
                            data=[[54865]*num_cols]*num_rows)

# Save the generated network traffic data to a CSV file
network_data.to_csv('/content/network_traffic_data_5.csv', index=False)

print("Network traffic data saved to 'network_traffic_data.csv'.")


Network traffic data saved to 'network_traffic_data.csv'.


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

# Load the trained Random Forest model
rf_model = joblib.load('/content/random_forest_model.pkl')

# Load the network traffic data from the CSV file
network_data = pd.read_csv('/content/network_traffic_data_5.csv')

# Preprocess the network traffic data (handle missing values, scale features)
# You need to preprocess it in the same way as your training data (e.g., handle missing values, scale features)
# Replace 'network_data' with the actual DataFrame containing your network traffic data

# Standardize the features
scaler = StandardScaler()
network_data_standardized = scaler.fit_transform(network_data)

# Use the trained Random Forest model to predict whether the network traffic data packets are malicious
predictions = rf_model.predict(network_data_standardized)

# Generate alerts for malicious data packets
for i, prediction in enumerate(predictions):
    if prediction == 1:  # Assuming 1 represents a malicious label
        print("Alert: Malicious network traffic detected at index", i)
        # You can further customize the alert generation based on your requirements


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

# Load the trained Random Forest model
print("Loading the trained Random Forest model...")
rf_model = joblib.load('/content/random_forest_model.pkl')
print("Random Forest model loaded successfully.")

# Load the network traffic data from the CSV file
print("Loading the network traffic data...")
network_data = pd.read_csv('/content/network_traffic_data_5.csv')
print("Network traffic data loaded successfully.")

# Preprocess the network traffic data
print("Preprocessing the network traffic data...")
# Ensure that preprocessing steps are applied correctly
scaler = StandardScaler()
network_data_standardized = scaler.fit_transform(network_data)
print("Network traffic data preprocessed successfully.")

# Use the trained Random Forest model to predict whether the network traffic data packets are malicious
print("Making predictions...")
predictions = rf_model.predict(network_data_standardized)
print("Predictions made successfully.")

# Generate alerts for malicious data packets
print("Generating alerts for malicious data packets...")
malicious_count = 0
for i, prediction in enumerate(predictions):
    if prediction == 1:  # Assuming 1 represents a malicious label
        print("Alert: Malicious network traffic detected at index", i)
        malicious_count += 1
print(f"Alert generation completed. Total {malicious_count} malicious data packets detected.")


Loading the trained Random Forest model...
Random Forest model loaded successfully.
Loading the network traffic data...
Network traffic data loaded successfully.
Preprocessing the network traffic data...
Network traffic data preprocessed successfully.
Making predictions...
Predictions made successfully.
Generating alerts for malicious data packets...
Alert generation completed. Total 0 malicious data packets detected.
