<a href="https://colab.research.google.com/github/j28302830/Senior_Project/blob/main/reconized_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download Samples and Models file

In [35]:
!wget http://dataset.tlm.unavarra.es/ransomware/samplesAndModels.tar.xz
!tar -Jxvf /content/samplesAndModels.tar.xz &> /dev/null

--2022-05-12 07:54:01--  http://dataset.tlm.unavarra.es/ransomware/samplesAndModels.tar.xz
Resolving dataset.tlm.unavarra.es (dataset.tlm.unavarra.es)... 130.206.160.87
Connecting to dataset.tlm.unavarra.es (dataset.tlm.unavarra.es)|130.206.160.87|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41519408 (40M) [application/x-xz]
Saving to: ‘samplesAndModels.tar.xz.1’


2022-05-12 07:54:06 (8.34 MB/s) - ‘samplesAndModels.tar.xz.1’ saved [41519408/41519408]



# Import Package

In [36]:
import tensorflow as tf
import keras
import glob
import numpy as np
from pickle import load
import joblib 

# Predefined Function

In [37]:
def load_sample(local_path):
  filenames = glob.glob(local_path + "/*.txt")
  for i,filename in enumerate(filenames):
    if i == 0:
      x = np.loadtxt(filename, delimiter=',', dtype = 'float32')
    else: 
      ds = np.loadtxt(filename, delimiter=',', dtype = 'float32')
      x = np.concatenate((x, ds), axis=0)

  return x

In [38]:
def metrics(model , x, y):
  predict = model.predict(x)
  predict[predict >= 0.99] = 1
  predict[predict < 0.99] = 0

  true_values = y
  predictions = predict
  
  TP = ((predictions == 1) & (true_values == 1)).sum()
  FP = ((predictions == 1) & (true_values == 0)).sum()
  TN = ((predictions == 0) & (true_values == 0)).sum()
  FN = ((predictions == 0) & (true_values == 1)).sum()

  accuracy = (TP+TN) / (TP+FP+TN+FN)
  precision = TP / (TP+FP)
  recall = TP / (TP+FN)
  f_measure = 2*(precision*recall/(precision+recall))
  temp = ((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))** 0.5
  phi_coefficient = (TP*TN-FP*FN) / temp

  evaluate = model.evaluate(x=x, y=y)

  return [evaluate[1], accuracy, precision, recall, f_measure, phi_coefficient]

In [39]:
def preprocessing(sample):
  scaler = joblib.load('/content/scaler.scaler')

  index = list(range(0, 30))

  label = np.delete(sample, index + [31], axis=1)
  feature = np.delete(sample, [30,31], axis=1)
  feature_10X3 = np.reshape(feature, (feature.shape[0], 10, 3))
  return label, feature, feature_10X3

# Load NN, CNN, and LSTM model

In [40]:
model_NN = tf.keras.models.load_model('/content/NN_CNN_LSTM_Comparison/NN')
model_CNN = tf.keras.models.load_model('/content/NN_CNN_LSTM_Comparison/CNN')
model_LSTM = tf.keras.models.load_model('/content/NN_CNN_LSTM_Comparison/LSTM')
chronologicalModel = tf.keras.models.load_model('/content/chronologicalModel/models/T1N10S10_modelo94.model')



JSON

In [41]:
json_file = open('/content/NN.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = tf.keras.models.model_from_json(loaded_model_json)

# Load sample and scaler

In [42]:
scaler = joblib.load('/content/scaler.scaler')
test = load_sample(local_path = r'/content/samples/ransomwareSamples_test')
train = load_sample(local_path = r'/content/samples/ransomwareSamples_train')
all_ransomware = np.concatenate( (train, test), axis = 0 )
#user = load_sample(local_path = r'/content/samples/userSamples')
day_zero = np.loadtxt('/content/samples/userSamples/day0.txt', delimiter=',', dtype = 'float32')

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


# Preprocessing Sample and Run Model 

### day0 and all ransomware sample

In [43]:
index = list(range(0, 30))

label = np.delete(all_ransomware, index + [31], axis=1)
feature = np.delete(all_ransomware, [30,31], axis=1)
feature_10X3 = np.reshape(feature, (feature.shape[0], 10, 3))

label2 = np.delete(day_zero, index, axis=1)
feature2 = np.delete(day_zero, [30], axis=1)

label = np.concatenate( (label, label2), axis = 0 )
feature = np.concatenate( (feature, feature2), axis = 0 )

feature_scaled = scaler.transform(feature)
feature_scaled_10X3 = np.reshape(feature_scaled, (feature_scaled.shape[0], 10, 3))

In [44]:
NN = metrics(model_NN, x=feature_scaled, y=label)
CNN = metrics(model_CNN, x=feature_scaled_10X3, y=label)
LSTM = metrics(model_LSTM, x=feature_scaled_10X3, y=label)
print("NN:\nevalute(): %f vs accuracy(): %f" %(NN[0], NN[1]) )
print("CNN:\nevalute(): %f vs accuracy(): %f" %(CNN[0], CNN[1]) )
print("LSTM:\nevalute(): %f vs accuracy(): %f" %(LSTM[0], LSTM[1]) )



NN:
evalute(): 0.966847 vs accuracy(): 0.966149
CNN:
evalute(): 0.966368 vs accuracy(): 0.965596
LSTM:
evalute(): 0.965845 vs accuracy(): 0.965573


### test sample without low activity data

In [45]:
index = list(range(0, 30))

label = np.delete(test, index + [31], axis=1)
feature = np.delete(test, [30,31], axis=1)

count = np.count_nonzero(feature == 0, axis=1)
low_activity = np.where( count >= (feature.shape[1]/3) )

label = np.delete(label, low_activity, axis=0)
feature = np.delete(feature, low_activity, axis=0)



feature_10X3 = np.reshape(feature, (feature.shape[0], 10, 3))
feature_scaled = scaler.transform(feature)
feature_scaled_10X3 = np.reshape(feature_scaled, (feature_scaled.shape[0], 10, 3))

In [46]:
NN = metrics(model_NN, x=feature_scaled, y=label)
CNN = metrics(model_CNN, x=feature_scaled_10X3, y=label)
LSTM = metrics(model_LSTM, x=feature_scaled_10X3, y=label)
chronological = metrics(model_LSTM, x=feature_scaled_10X3, y=label)
print("NN:\nevalute(): %f vs accuracy(): %f" %(NN[0], NN[1]) )
print("CNN:\nevalute(): %f vs accuracy(): %f" %(CNN[0], CNN[1]) )
print("LSTM:\nevalute(): %f vs accuracy(): %f" %(LSTM[0], LSTM[1]) )
print("chronological:\nevalute(): %f vs accuracy(): %f" %(chronological[0], chronological[1]) )

 56/292 [====>.........................] - ETA: 0s - loss: 0.1959 - accuracy: 0.9609



NN:
evalute(): 0.973526 vs accuracy(): 0.952519
CNN:
evalute(): 0.977170 vs accuracy(): 0.937192
LSTM:
evalute(): 0.980279 vs accuracy(): 0.946838
chronological:
evalute(): 0.980279 vs accuracy(): 0.946838


### train sample

In [47]:
index = list(range(0, 30))

label = np.delete(train, index + [31], axis=1)
feature = np.delete(train, [30,31], axis=1)

feature_10X3 = np.reshape(feature, (feature.shape[0], 10, 3))
feature_scaled = scaler.transform(feature)
feature_scaled_10X3 = np.reshape(feature_scaled, (feature_scaled.shape[0], 10, 3))

In [48]:
NN = metrics(model_NN, x=feature_scaled, y=label)
CNN = metrics(model_CNN, x=feature_scaled_10X3, y=label)
LSTM = metrics(model_LSTM, x=feature_scaled_10X3, y=label)
chronological = metrics(model_LSTM, x=feature_scaled_10X3, y=label)
print("NN:\nevalute(): %f vs accuracy(): %f" %(NN[0], NN[1]) )
print("CNN:\nevalute(): %f vs accuracy(): %f" %(CNN[0], CNN[1]) )
print("LSTM:\nevalute(): %f vs accuracy(): %f" %(LSTM[0], LSTM[1]) )
print("chronological:\nevalute(): %f vs accuracy(): %f" %(chronological[0], chronological[1]) )

 48/361 [==>...........................] - ETA: 0s - loss: 0.0031 - accuracy: 0.9993



NN:
evalute(): 0.995575 vs accuracy(): 0.986030
CNN:
evalute(): 0.994620 vs accuracy(): 0.984035
LSTM:
evalute(): 0.989414 vs accuracy(): 0.974230
chronological:
evalute(): 0.989414 vs accuracy(): 0.974230
