In [3]:
!pip install seaborn



In [5]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib.pyplot as plt

In [10]:
ataques = ['ackscan','bruteforcedirb','cmsscan','dosddos','fullconnectscan','icmpechodiscover','mysqlbruteforces','nikto','scanvuln','ssh_a','stealthscan','synscan','udpscan','wapiti']

print("Shape Ataques")
for ataque in ataques:

    df = pd.read_csv('dataset/in/ataque/'+ataque+'.csv')
    print(ataque + str(df.shape))

normais = ['dns','http','smtp','snmp','ssh']

print("Shape Normais")
for normal in normais:

    df = pd.read_csv('dataset/in/normal/'+normal+'.csv')
    print(normal + str(df.shape))


Shape Ataques
ackscan(13238071, 41)
bruteforcedirb(481583, 41)
cmsscan(1088524, 41)
dosddos(734836, 41)
fullconnectscan(13208980, 41)
icmpechodiscover(13238071, 41)
mysqlbruteforces(70212, 41)
nikto(1632614, 41)
scanvuln(13764217, 41)
ssh_a(12243, 41)
stealthscan(13237120, 41)
synscan(13172535, 41)
udpscan(51841, 41)
wapiti(1492, 41)
Shape Normais
dns(49178, 41)
http(516525, 41)
smtp(286839, 41)
snmp(2766, 41)
ssh(18286, 41)


In [12]:
def salvarMetricas(path, classifier, cm, report, acc):
    with open(path + classifier + ".txt", 'w') as arquivo:
        arquivo.write("###" + classifier + "###\n")
        arquivo.write(str(cm)+"\n")
        arquivo.write(report+"\n")
        arquivo.write(acc+"\n")
        arquivo.write("###"+"\n")
        arquivo.write("\n")
    

In [14]:
def classifyRF(path, X_train, y_train, X_test, y_test):
    
    classifier = "RandomForest" 
    print(classifier)
    
    model = RandomForestClassifier(n_estimators=100, random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [16]:
def classifyDT(path, X_train, y_train, X_test, y_test):
    
    classifier = "DecisionTree" 
    print(classifier)
    
    model = DecisionTreeClassifier(random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [18]:
def classifyNB(path, X_train, y_train, X_test, y_test):
    
    classifier = "NaiveBayes" 
    print(classifier)
    
    model = GaussianNB()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [20]:
def classifyADA(path, X_train, y_train, X_test, y_test):
    
    classifier = "AdaBoost" 
    print(classifier)
    
    # Criar o modelo AdaBoost
    base_estimator = DecisionTreeClassifier(max_depth=1)  # Estimador base: árvore de decisão simples
    model = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=50, random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [22]:
def classifyKNN(path, X_train, y_train, X_test, y_test):
    
    classifier = "KNN" 
    print(classifier)
    
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)
    
    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [24]:
def classifyMLP(path, X_train, y_train, X_test, y_test):
    
    classifier = "MPL" 
    print(classifier)
    model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [34]:
def classifyCategoricalNB(path, X_train, y_train, X_test, y_test):
    
    classifier = "CategoricalNB" 
    print(classifier)
    model = CategoricalNB()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyBagging(path, X_train, y_train, X_test, y_test):
    
    classifier = "Bagging" 
    print(classifier)

    # Criar o modelo Bagging
    base_estimator = DecisionTreeClassifier(max_depth=5, random_state=16)  # Estimador base: árvore de decisão simples
    model = BaggingClassifier(estimator=base_estimator, n_estimators=50, random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)

    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifySVC(path, X_train, y_train, X_test, y_test):
    
    classifier = "SVC" 
    print(classifier)
    
    model = SVC(kernel="linear", C=1.0, random_state=16)
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyETC(path, X_train, y_train, X_test, y_test):
    
    classifier = "ETC" 
    print(classifier)
    
    model = ExtraTreesClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyLR(path, X_train, y_train, X_test, y_test):
    
    classifier = "LogisticRegression" 
    print(classifier)
    
    model = LogisticRegression()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyGBC(path, X_train, y_train, X_test, y_test):
    
    classifier = "GradientBoosting" 
    print(classifier)
    
    model = GradientBoostingClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyRidge(path, X_train, y_train, X_test, y_test):
    
    classifier = "Ridge" 
    print(classifier)
    
    model = RidgeClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyLDA(path, X_train, y_train, X_test, y_test):
    
    classifier = "LDA" 
    print(classifier)
    
    model = LinearDiscriminantAnalysis()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyPerceptron(path, X_train, y_train, X_test, y_test):
    
    classifier = "Perceptron" 
    print(classifier)
    
    model = Perceptron()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyGPC(path, X_train, y_train, X_test, y_test):
    
    classifier = "GaussianProcess" 
    print(classifier)
    
    model = GaussianProcessClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifySGD(path, X_train, y_train, X_test, y_test):
    
    classifier = "SGD" 
    print(classifier)
    
    model = SGDClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
def classifyPA(path, X_train, y_train, X_test, y_test):
    
    classifier = "PassiveAggressive" 
    print(classifier)
    
    model = PassiveAggressiveClassifier()
    model.fit(X_train ,y_train)

    # Salvando o modelo em um arquivo .pkl usando pickle
    with open(path + classifier + '.pkl', 'wb') as arquivo:
        pickle.dump(model, arquivo)
    
    # Fazer previsões
    y_pred = model.predict(X_test)
    
    # Gerar matriz de confusão
    cm = confusion_matrix(y_test, y_pred)
    
    print(cm)
    
    report = classification_report(y_test, y_pred, target_names=['ataque','normal'])
    
    print(report)

    accuracy = accuracy_score(y_test, y_pred)
    acc = f'Acurácia do modelo: {accuracy * 100:.2f}%'
    print(acc)
    print("\n")

    salvarMetricas(path, classifier, cm, report, acc)

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import Perceptron
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib.pyplot as plt

#for i in range(6,9):
lista = [1]
for i in lista:
    
    path_dataset = 'dataset/combination_new/exp'+str(i)+'/'
    
    df_train = pd.read_csv(path_dataset+'train.csv')
    X_train = df_train.drop('Label', axis=1)  
    y_train = df_train['Label']  
    
    df_test = pd.read_csv(path_dataset+'test.csv')
    X_test = df_test.drop('Label', axis=1) 
    y_test = df_test['Label']  
    
    #classifyRF(path_dataset, X_train, y_train, X_test, y_test)
    #classifyDT(path_dataset, X_train, y_train, X_test, y_test)
    #classifyNB(path_dataset, X_train, y_train, X_test, y_test)
    #classifyADA(path_dataset, X_train, y_train, X_test, y_test)
    #classifyMLP(path_dataset, X_train, y_train, X_test, y_test)
    #classifyBagging(path_dataset, X_train, y_train, X_test, y_test)
    classifySVC(path_dataset, X_train, y_train, X_test, y_test)
    classifyETC(path_dataset, X_train, y_train, X_test, y_test)
    classifyLR(path_dataset, X_train, y_train, X_test, y_test)
    classifyGBC(path_dataset, X_train, y_train, X_test, y_test)
    classifyRidge(path_dataset, X_train, y_train, X_test, y_test)
    classifyLDA(path_dataset, X_train, y_train, X_test, y_test)
    classifyPerceptron(path_dataset, X_train, y_train, X_test, y_test)
    classifyGPC(path_dataset, X_train, y_train, X_test, y_test)
    classifySGD(path_dataset, X_train, y_train, X_test, y_test)
    classifyPA(path_dataset, X_train, y_train, X_test, y_test)
    classifyKNN(path_dataset, X_train, y_train, X_test, y_test)
    classifyCategoricalNB(path_dataset, X_train, y_train, X_test, y_test)


Bagging
[[15870975  1129881]
 [   14807    91484]]
              precision    recall  f1-score   support

      ataque       1.00      0.93      0.97  17000856
      normal       0.07      0.86      0.14    106291

    accuracy                           0.93  17107147
   macro avg       0.54      0.90      0.55  17107147
weighted avg       0.99      0.93      0.96  17107147

Acurácia do modelo: 93.31%


SVC


In [None]:
from sklearn.metrics import confusion_matrix

# Exemplo de labels verdadeiros e labels previstos
y_true = [0, 10, 0, 1, 0, 1]
y_pred = [0, 1, 1, 1, 0, 0]

# Gera a matriz de confusão
cm = confusion_matrix(y_true, y_pred)

print("Matriz de Confusão:")
print(cm)

# Acessando os valores individuais
tn, fp, fn, tp = cm.ravel()

print(f"Verdadeiros Negativos (TN): {tn}")
print(f"Falsos Positivos (FP): {fp}")
print(f"Falsos Negativos (FN): {fn}")
print(f"Verdadeiros Positivos (TP): {tp}")