In [13]:
# Importing Libraries
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sklearn
import json

# Preprocessing purpose
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Splitting Data
from sklearn.model_selection import train_test_split

# For Decision tree, accuracy, Classification Report, Confusion Matrix
from sklearn import metrics
from sklearn import tree

# Mail Notification
import smtplib
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email import encoders

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [25]:
# Uploading datasets for training,testing and prediction
def readData():
    dataset = pd.read_csv('./Data/UNSW_NB15_training-set.csv')
    with open('./Data/mailCreds.json', 'r') as file:
        creds = json.load(file)
    return [dataset, creds]

In [26]:
def standardScaling(dataset):
    SS = StandardScaler()
    # extract numerical attributes and scale it to have unit standard deviation 
    cols = dataset.select_dtypes(include=['float64','int64']).columns
    intCols = SS.fit_transform(dataset.select_dtypes(include=['float64','int64']))
    # turn the result back to a dataframe
    intColsDf = pd.DataFrame(intCols, columns = cols)
    return intColsDf

In [27]:
def LabelEncoding(dataset):
    LE = LabelEncoder()
    # extract categorical attributes from both training and test sets
    objCols = dataset.select_dtypes(include=['object']).copy()
    # encode the categorical attributes
    LEobjCols = objCols.apply(LE.fit_transform)
    # separate target column from encoded data 
    encObjCols = LEobjCols.drop(['attack_cat'], axis=1)
    return encObjCols

In [28]:
def mlTraining(train_x, train_y):
    X_train,X_test,Y_train,Y_test = train_test_split(train_x,train_y,test_size=0.80, random_state=10)
    DTC_Classifier = tree.DecisionTreeClassifier(criterion='entropy', random_state=0) #Decision Tree Classifier
    DTC_Classifier.fit(X_train, Y_train)
    return [DTC_Classifier, X_train, X_test, Y_train, Y_test]

In [29]:
def trainingResults(model, X_train, Y_train):
    train_accuracy = metrics.accuracy_score(Y_train, model.predict(X_train))
    train_confusion_matrix = metrics.confusion_matrix(Y_train, model.predict(X_train))
    train_classification = metrics.classification_report(Y_train, model.predict(X_train), output_dict=True)
    print()
    print('============================== Decision Tree Classifier Model Evaluation ==============================')
    print ("Model Accuracy:" "\n", train_accuracy)
    print()
    print("Confusion matrix:" "\n", train_confusion_matrix)
    print()
    return [train_accuracy, train_confusion_matrix, train_classification]

In [30]:
def testingResults(model, X_test, Y_test):
    test_accuracy = metrics.accuracy_score(Y_test, model.predict(X_test))
    test_confusion_matrix = metrics.confusion_matrix(Y_test, model.predict(X_test))
    test_classification = metrics.classification_report(Y_test, model.predict(X_test), output_dict=True)
    print()
    print('============================== Decision Tree Classifier Model Test Results ==============================')
    print()
    print ("Model Accuracy:" "\n", test_accuracy)
    print()
    print("Confusion matrix:" "\n", test_confusion_matrix)
    print()    
    return [test_accuracy, test_confusion_matrix, test_classification]

In [31]:
def save_report_to_excel(report, filename):
    # Convert classification report to DataFrame
    report_df = pd.DataFrame(report).transpose()
    
    # Save DataFrame to Excel file
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        report_df.to_excel(writer, sheet_name='Classification Report')

In [32]:
def send_email(subject, body, to_email, from_email, password, attachment_path):
    # Create the email message
    msg = MIMEMultipart()
    msg['From'] = from_email
    msg['To'] = to_email
    msg['Subject'] = subject

    # Attach the email body to the message
    msg.attach(MIMEText(body, 'plain'))
    with open(attachment_path, 'rb') as attachment:
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(attachment.read())
        encoders.encode_base64(part)
        part.add_header(
            'Content-Disposition',
            f'attachment; filename= {attachment_path}',
        )
        msg.attach(part)

    try:
        # Set up the server
        server = smtplib.SMTP_SSL('smtp.gmail.com', 465)

        # Log in to the email account
        server.login(from_email, password)

        # Send the email
        server.sendmail(from_email, to_email, msg.as_string())

        # Close the server
        server.quit()

        print("Email sent successfully!")
    except Exception as e:
        print(f"Failed to send email: {e}")

In [37]:
[dataset, creds] = readData()
intCols = standardScaling(dataset)
objCols = LabelEncoding(dataset)
train_x = pd.concat([intCols, objCols],axis=1)
train_y = dataset['attack_cat']
[model, X_train, X_test, Y_train, Y_test] = mlTraining(train_x, train_y)
from_email = creds['email_id']
auth_password = creds['password']
subject = "Classification Report"
body = "Please find the attached classification report."
to_email = "tsric12345@gmail.com"
excel_filename = 'classification_report.xlsx'

In [38]:
modelTraining = trainingResults(model, X_train, Y_train)
modelTesting = testingResults(model, X_test, Y_test)
report_dict = modelTesting[2]
save_report_to_excel(report_dict, excel_filename)


Model Accuracy:
 1.0

Confusion matrix:
 [[ 128    0    0    0    0    0    0    0    0    0]
 [   0  116    0    0    0    0    0    0    0    0]
 [   0    0  767    0    0    0    0    0    0    0]
 [   0    0    0 2231    0    0    0    0    0    0]
 [   0    0    0    0 1248    0    0    0    0    0]
 [   0    0    0    0    0 3867    0    0    0    0]
 [   0    0    0    0    0    0 7305    0    0    0]
 [   0    0    0    0    0    0    0  713    0    0]
 [   0    0    0    0    0    0    0    0   82    0]
 [   0    0    0    0    0    0    0    0    0    9]]



Model Accuracy:
 0.8758236419396958

Confusion matrix:
 [[   56    96    98   182   111     0     0     6     0     0]
 [   81    29    70   175    94     7     0    10     1     0]
 [  137    88  1086  1472   255    86     0   164    33     1]
 [  190   170  1422  5988   519   174     0   332    83    23]
 [  112   106   255   480  3718    37     0    44    60     2]
 [    0     4    67   153    52 14712     0     5    

In [36]:
# Send the email with the classification report
send_email(subject, body, to_email, from_email, auth_password, excel_filename)

Email sent successfully!
