In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('/home/sonia/Bureau/ransomware_detection/personalizedCleanedRansomwareDetection.csv')

X = df.drop('Family', axis=1)
y = df.Family
#print(y)

np.random.seed(3)

from sklearn.model_selection import train_test_split, cross_val_score, StratifiedShuffleSplit
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y)

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier
from sklearn.linear_model import Perceptron
from xgboost import XGBClassifier,XGBRFClassifier

from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

models = []

models.append(('MLP', Perceptron()))
models.append(('RFC', RandomForestClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('BGC', BaggingClassifier()))
models.append(('ABC', AdaBoostClassifier()))
models.append(('XGBC', XGBClassifier()))
models.append(('XGBRFC', XGBClassifier()))


maxim = 0
best_model = []

for name, clf in models:
    clf.fit(X_train, y_train)
    prediction = clf.predict(X_test)
    accuracy = accuracy_score(y_test, prediction)
    print('%s: %f' % (name, accuracy))
    if(accuracy > maxim):
        maxim = accuracy
        best_model.append((name, clf))

print('the best model with the train_test_split :', best_model[-1][0])
# Selection des algos
model = best_model[-1][1]
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

# evaluation des algos
maxim = 0
results = []
names = []
best_model = []

for name, model in models:
    kfold = StratifiedShuffleSplit(n_splits=7, test_size=0.27, random_state=3)
    cv_results = cross_val_score(model,X,y,cv=kfold,scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    me = cv_results.mean()
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
    if(me > maxim):
        maxim = me
        best_model.append((name, model))
"""
pyplot.boxplot(results, labels=names)
pyplot.title('Algorithm Comparison')
pyplot.show()
"""
print('Best model given the best accuracy is: '+best_model[-1][0])
# Selection des algos
model = best_model[-1][1]
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)

print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

MLP: 0.133333
RFC: 0.866667
CART: 0.733333
BGC: 0.866667
ABC: 0.400000
XGBC: 0.800000
XGBRFC: 0.800000
the best model with the train_test_split : RFC
0.8666666666666667
[[1 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 2 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 1]]
              precision    recall  f1-score   support

  Cryptowall       1.00      1.00      1.00         1
  Megacortex       1.00      1.00      1.00         1
        Ryuk       0.33      1.00      0.50         1
      Samsam       1.00      1.00      1.00         1
     bitcoin       1.00      1.00      1.00         1
      cerber       0.00      0.00      0.00         2
 cryptojoker       1.00      1.00      1.00         1
      crysis       1.00      1.00      1.00         3
      dharma       

  _warn_prf(average, modifier, msg_start, len(result))


MLP: 0.243697 (0.079721)
RFC: 0.714286 (0.049000)
CART: 0.638655 (0.066168)
BGC: 0.655462 (0.073259)
ABC: 0.369748 (0.041168)
XGBC: 0.680672 (0.053148)
XGBRFC: 0.680672 (0.053148)
Best model given the best accuracy is: RFC
0.8666666666666667
[[1 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 1 0 0 1 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 3 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 0 0 1]]
              precision    recall  f1-score   support

  Cryptowall       1.00      1.00      1.00         1
  Megacortex       1.00      1.00      1.00         1
        Ryuk       0.50      1.00      0.67         1
      Samsam       0.50      1.00      0.67         1
     bitcoin       1.00      1.00      1.00         1
      cerber       1.00      0.50      0.67         2
 cryptojoker       0.00      0.00      0.00         1


  _warn_prf(average, modifier, msg_start, len(result))
