# Script 1 - Data Collection

In [None]:
import pandas as pd
from pysnmp.hlapi import *
import schedule
import time
import os

# Function to collect SNMP data (SNMPv3)
def get_snmp_data_v3(target, oid, user, auth_key, priv_key, port=161):
    snmp_engine = SnmpEngine()
    usm_user_data = UsmUserData(
        user,
        authKey=auth_key,
        privKey=priv_key,
        authProtocol=usmHMACSHAAuthProtocol,  # Authentication Protocol
        privProtocol=usmAesCfb128Protocol     # Privacy Protocol
    )
    transport = UdpTransportTarget((target, port))  # UDP/IPv4 Transport
    context = ContextData()
    object_type = ObjectType(ObjectIdentity(oid))

    iterator = getCmd(
        snmp_engine,
        usm_user_data,
        transport,
        context,
        object_type
    )

    errorIndication, errorStatus, errorIndex, varBinds = next(iterator)

    if errorIndication:
        print(f"Error: {errorIndication}")
        return None
    elif errorStatus:
        print(f"Error: {errorStatus.prettyPrint()}")
        return None
    else:
        for varBind in varBinds:
            return varBind[1].prettyPrint()

# Router IP
router = '192.168.40.1'

# SNMPv3 Credentials (example)
user = 'myUser'
auth_key = 'myAuthKey'
priv_key = 'myPrivKey'

# OIDs that we want to collect
oids = {
    'ifInOctets11': '1.3.6.1.2.1.2.2.1.10.1',
    'ifOutOctets11': '1.3.6.1.2.1.2.2.1.16.1',
    'ifInDiscards11': '1.3.6.1.2.1.2.2.1.13.1',
    'ifOutDiscards11': '1.3.6.1.2.1.2.2.1.19.1',
    'ifInUcastPkts11': '1.3.6.1.2.1.2.2.1.11.1',
    'ifOutUcastPkts11': '1.3.6.1.2.1.2.2.1.17.1',
    'ifInNUcastPkts11': '1.3.6.1.2.1.2.2.1.12.1',
    'ifOutNUcastPkts11': '1.3.6.1.2.1.2.2.1.18.1'
}

# List to store the collected data
collected_data = []

# Function to collect data from all devices
def collect_data():
    global collected_data
    for device in devices:
        device_data = {'IP': device}
        print(f"Collecting data from {device}")
        for oid_name, oid in oids.items():
            print(f"OID {oid_name}:")
            result = get_snmp_data_v3(device, oid, user, auth_key, priv_key)
            device_data[oid_name] = result
        collected_data.append(device_data)
        print("\n")

    # Convert the collected data into a pandas DataFrame
    df = pd.DataFrame(collected_data)

    # Save the collected data to a CSV file
    df.to_csv('collected_snmp_data.csv', index=False)

    # Print the collected data
    print(df)

# Schedule the collection to run every 15 seconds
schedule.every(15).seconds.do(collect_data)

if __name__ == "__main__":
    # Initialize device list
    devices = [router]  # Add more device IPs if needed

    while True:
        schedule.run_pending()
        time.sleep(1)


# Script 2 - Data Processing

In [None]:
# Convert the collected data into pandas DataFrame
df = pd.DataFrame(collected_data)

# Function to clean the data
def clean_data(df):
    # Remove duplicattes
    df = df.drop_duplicates()
    
    # Converts the columns to the appropriate data type
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, invalid values become NaN
    
    return df

# Clean the data
df_cleaned = clean_data(df)

# Script 3 - Data Analysis report

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import mutual_info_classif
import schedule
import time

# Definir colunas dos diferentes grupos de variáveis
interface_columns = ['ifInOctets11', 'ifOutOctets11', 'ifoutDiscards11', 'ifInUcastPkts11', 'ifInNUcastPkts11', 
                     'ifInDiscards11', 'ifOutUcastPkts11', 'ifOutNUcastPkts11']
ip_columns = ['ipInReceives', 'ipInDelivers', 'ipOutRequests', 'ipOutDiscards', 'ipInDiscards', 'ipForwDatagrams', 
              'ipOutNoRoutes', 'ipInAddrErrors']
icmp_columns = ['icmpInMsgs', 'icmpInDestUnreachs', 'icmpOutMsgs', 'icmpOutDestUnreachs', 'icmpInEchos', 'icmpOutEchoReps']
tcp_columns = ['tcpOutRsts', 'tcpInSegs', 'tcpOutSegs', 'tcpPassiveOpens', 'tcpRetransSegs', 'tcpCurrEstab', 'tcpEstabResets', 
               'tcp?ActiveOpens']
udp_columns = ['udpInDatagrams', 'udpNoPorts', 'udpInErrors', 'udpOutDatagrams']

groups = {
    'Interface': interface_columns,
    'IP': ip_columns,
    'ICMP': icmp_columns,
    'TCP': tcp_columns,
    'UDP': udp_columns
}

# Função para carregar dados
def load_data(train_path, test_path):
    train_data = pd.read_csv(train_path)
    test_data = pd.read_csv(test_path)
    return train_data, test_data

# Função para preparar os dados
def prepare_data(train_data, test_data):
    X_train = train_data.drop(columns=['class'])
    y_train = train_data['class']
    X_test = test_data.drop(columns=['class'])
    y_test = test_data['class']
    return X_train, y_train, X_test, y_test

# Função para calcular a informação mútua
def compute_mutual_info(X_train, y_train):
    mi = mutual_info_classif(X_train, y_train)
    mi_series = pd.Series(mi, index=X_train.columns).sort_values(ascending=False)
    return mi_series

# Função para plotar os scores de informação mútua
def plot_mi_scores(scores):
    scores = scores.sort_values(ascending=True)
    width = np.arange(len(scores))
    ticks = list(scores.index)
    plt.barh(width, scores)
    plt.yticks(width, ticks)
    plt.title("Mutual Information Scores")
    plt.show()

# Função para criar gráficos de linha
def plot_line_graph(df, columns, group_name):
    plt.figure(figsize=(14, 7))
    for column in columns:
        plt.plot(df.index, df[column], label=column, alpha=0.6)
    plt.xlabel('Time')
    plt.ylabel('Values')
    plt.title(f'Line Plots - {group_name}')
    plt.legend(loc='upper right')
    plt.savefig(f'{group_name}_line_plot.png')
    plt.close()

# Função para calcular e plotar a matriz de correlação
def plot_corr_matrix(df, columns, group_name):
    corr_matrix = df[columns].corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title(f'Correlation Matrix - {group_name}')
    plt.savefig(f'{group_name}_correlation_matrix.png')
    plt.close()

# Função para executar a análise de dados
def analyze_data(train_path, test_path):
    train_data, test_data = load_data(train_path, test_path)
    X_train, y_train, X_test, y_test = prepare_data(train_data, test_data)
    
    # Descrição geral dos dados
    with open('data_description.txt', 'w') as f:
        f.write("Descrição geral dos dados de treinamento:\n")
        f.write(train_data.describe().to_string())
    
    # Descrição dos dados por grupo
    for group_name, columns in groups.items():
        with open('data_description.txt', 'a') as f:
            f.write(f"\n\nDescrição dos dados - {group_name}:\n")
            f.write(train_data[columns].describe().to_string())
        
        # Plotar gráficos de linha
        plot_line_graph(train_data, columns, group_name)
        
        # Plotar matriz de correlação
        plot_corr_matrix(train_data, columns, group_name)
    
    # Calcular e plotar informação mútua
    mi_series = compute_mutual_info(X_train, y_train)
    plt.figure(dpi=100, figsize=(8, 5))
    plot_mi_scores(mi_series)
    
    # Salvar os scores de informação mútua
    mi_series.to_csv('mutual_information_scores.csv')
    
    print("Análise de dados completa. Relatórios e gráficos gerados.")

# Agendar a execução periódica
def job():
    train_path = 'path/to/train_data.csv' 
    test_path = 'path/to/test_data.csv'    
    analyze_data(train_path, test_path)

# Agendar para executar diariamente à meia-noite
schedule.every().day.at("00:00").do(job)

if __name__ == "__main__":
    while True:
        schedule.run_pending()
        time.sleep(1)


# Script 4.1 - Classification Model

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import joblib

# Function to load the data
def load_data(train_path, test_path):
    train_data = pd.read_csv(train_path)
    test_data = pd.read_csv(test_path)
    return train_data, test_data

# Function to prepare the data
def prepare_data(train_data, test_data):
    X_train = train_data.drop(columns=['class'])
    y_train = train_data['class']
    X_test = test_data.drop(columns=['class'])
    y_test = test_data['class']
    return X_train, y_train, X_test, y_test

# Function to select the features
def select_top_features(mi_path, num_features):
    mi_series = pd.read_csv(mi_path, index_col=0, squeeze=True)
    top_features = mi_series.head(num_features).index.tolist()
    return top_features

# Function to train and save the SVM model
def train_and_save_model(X_train, y_train, top_features, model_path):
    X_train_selected = X_train[top_features]
    
    # Data normalization
    scaler = StandardScaler()
    X_train_selected = scaler.fit_transform(X_train_selected)
    
    # SVM Model Training
    svm = SVC(class_weight='balanced', decision_function_shape='ovo', random_state=42)
    svm.fit(X_train_selected, y_train)
    
    # Saving the model, the scaler, and the top features
    model_info = {
        'model': svm,
        'scaler': scaler,
        'features': top_features
    }
    joblib.dump(model_info, f'{model_path}.pkl')

    print("Model trained and saved.")

# Path of the files
train_path = 'path/to/train_data.csv'
test_path = 'path/to/test_data.csv'
mi_path = 'mutual_information_scores.csv'
model_path = 'path/to/saved_model'

# Load and prepare the data
train_data, test_data = load_data(train_path, test_data)
X_train, y_train, X_test, y_test = prepare_data(train_data, test_data)

# Selecting the 15 main features
num_features = 15  
top_features = select_top_features(mi_path, num_features)

# Training and saving the model
train_and_save_model(X_train, y_train, top_features, model_path)


# Script 4.2 - Troubleshooting

In [None]:
from flask import Flask, request, jsonify
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler
from jinja2 import Template
import schedule
import time
import os
from datetime import datetime, timedelta

app = Flask(__name__)

# Path of the model
model_info_path = 'model/saved_model_info.pkl'

# Load the model, scaler, and features
model_info = joblib.load(model_info_path)
svm_model = model_info['model']
scaler = model_info['scaler']
top_features = model_info['features']

# Function to prepare the received data
def prepare_input(data, top_features):
    df = pd.DataFrame(data)
    df_selected = df[top_features]
    df_scaled = scaler.transform(df_selected)
    return df_scaled

# Function to generate the alert report with suggestions
def generate_alert_report(predictions, data):
    class_counts = {cls: list(predictions).count(cls) for cls in set(predictions)}
    abnormal_classes = {cls: count for cls, count in class_counts.items() if cls != 'normal'}
    
    suggestions = {
        'tcp-syn': 'Check for potential TCP SYN flood attacks and review firewall rules.',
        'udp-flood': 'Inspect for UDP flood attacks and consider rate limiting.',
        'icmp-echo': 'Investigate ICMP echo requests for potential ping floods.',
        'httpFlood': 'Analyze HTTP traffic for potential HTTP flood attacks.'
    }
    
    abnormal_suggestions = {cls: suggestions.get(cls, 'No specific suggestion available.') for cls in abnormal_classes}

    template_str = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Alert Report</title>
        <style>
            body { font-family: Arial, sans-serif; }
            h1 { text-align: center; }
            .section { margin-bottom: 40px; }
            .section h2 { border-bottom: 1px solid #000; }
        </style>
    </head>
    <body>
        <h1>Alert Report</h1>
        <div class="section">
            <h2>Results of the classification model:</h2>
            <ul>
                {% for class_name, count in class_counts.items() %}
                    <li>{{ class_name }} class = {{ count }} entries</li>
                {% endfor %}
            </ul>
        </div>
        <div class="section">
            <h2>Abnormal Classes Detected:</h2>
            {% if abnormal_classes %}
                <ul>
                {% for class_name, count in abnormal_classes.items() %}
                    <li>{{ class_name }}: {{ count }} entries</li>
                {% endfor %}
                </ul>
            {% else %}
                <p>No abnormal classes detected.</p>
            {% endif %}
        </div>
        <div class="section">
            <h2>Suggestions for Resolution:</h2>
            {% if abnormal_suggestions %}
                <ul>
                {% for class_name, suggestion in abnormal_suggestions.items() %}
                    <li>{{ class_name }}: {{ suggestion }}</li>
                {% endfor %}
                </ul>
            {% else %}
                <p>No suggestions available.</p>
            {% endif %}
        </div>
        <div class="section">
            <h2>Received data</h2>
            <pre>{{ data }}</pre>
        </div>
    </body>
    </html>
    """
    template = Template(template_str)
    html_out = template.render(class_counts=class_counts, abnormal_classes=abnormal_classes, 
                               abnormal_suggestions=abnormal_suggestions, data=data.to_string())
    
    report_path = 'alert_report.html'
    with open(report_path, 'w') as f:
        f.write(html_out)
    
    return report_path

# Function to make predictions and generate alert reports if necessary
def predict_and_alert():
    # Get the latest CSV files from the last 5 minutes
    data_path = 'data'
    current_time = datetime.now()
    five_minutes_ago = current_time - timedelta(minutes=5)
    
    list_of_files = [os.path.join(data_path, f) for f in os.listdir(data_path) if f.startswith('collected_snmp_data_') and 
                     f.endswith('.csv')]
    recent_files = [f for f in list_of_files if datetime.fromtimestamp(os.path.getctime(f)) > five_minutes_ago]
    
    # Load and concatenate the recent files
    if recent_files:
        df_list = [pd.read_csv(f) for f in recent_files]
        df = pd.concat(df_list, ignore_index=True)
        
        # Make predictions and generate an alert report if necessary
        input_data = prepare_input(df, top_features)
        predictions = svm_model.predict(input_data)
        
        report_path = generate_alert_report(predictions, df)
        print(f"Report generated: {report_path}")
    else:
        print("No recent data files found.")

# Endpoint for prediction via API
@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    input_data = prepare_input(data, top_features)
    
    predictions = svm_model.predict(input_data)
    
    report_path = generate_alert_report(predictions, pd.DataFrame(data))
    return jsonify({"message": "Prediction completed.", "report": report_path}), 200

# Schedule the prediction to run every 5 minutes
schedule.every(5).minutes.do(predict_and_alert)

if __name__ == "__main__":
    # Run the Flask server
    app.run(debug=True)
    
    # Schedule the prediction
    while True:
        schedule.run_pending()
        time.sleep(1)
