# Imports

In [2]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sb
import time
# import mat4py
import scipy.io 
import sklearn
from sklearn import preprocessing
import plotly.express as px
from sklearn import decomposition
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans 
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.fft import fft, ifft
from scipy.signal import lfilter,butter
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer


ModuleNotFoundError: No module named 'pandas'

In [None]:
!pip install tensorflow
!pip install antropy

In [None]:
import antropy as ant
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
# from keras.utils import to_categorical

In [None]:
from tensorflow.keras.utils import to_categorical

# Data loading

In [None]:
! ls

In [None]:
real_signal_filename = 'Dataset1.mat'
attack_signal_filename = 'sampleAttack.mat'


In [None]:

def load_realsignal_from_path(fpath):
    mat = scipy.io.loadmat(fpath)
    raw_data = mat['Raw_Data']
    total_rows = []
    for i in range(raw_data.shape[0]):
        for j in range(raw_data.shape[1]):
            total_rows.extend(np.array_split(raw_data[i][j], 4))
    raw_df = pd.DataFrame(total_rows)
    return raw_df

def load_attacksignal_from_path(fpath):
    mat = scipy.io.loadmat(fpath)
    raw_data = mat['attackVectors']
    total_rows = []
    for i in range(raw_data.shape[0]):
        for j in range(raw_data.shape[1]):
            for k in range(raw_data.shape[2]):
                total_rows.append(raw_data[i][j][k])
    raw_df = pd.DataFrame(total_rows)
    return raw_df

In [None]:
real_df = load_realsignal_from_path(real_signal_filename)

In [None]:
attack_df = load_attacksignal_from_path(attack_signal_filename)

In [None]:
real_df.head()

# Data Normalization

In [None]:
def min_max_norm(df):
    x = df.values
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df_minmax = pd.DataFrame(x_scaled)
    return df_minmax

In [None]:
def std_norm(df):
    x = df.values
    std_scaler = preprocessing.StandardScaler()
    x_scaled = std_scaler.fit_transform(x)
    df_std = pd.DataFrame(x_scaled)
    return df_std

In [None]:
x_superset = pd.concat( [std_norm(real_df), std_norm(attack_df)] , axis = 0 )

In [None]:
y_superset = [1.0 for i in range(len(real_df)) ]
y_superset.extend([0.0 for i in range(len(attack_df))])

# Test Train split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_superset, y_superset, test_size=0.3, random_state = 1)

# Features extraction list

Features: PCA, Entropy, FFT Theta band, Bio Orthogonal Family, TF-IDF

models: SVM, Cosine, Kmeans, KNN, CNN

## PCA feature extraction

In [None]:
best_feature_count = 100
pca = decomposition.PCA(n_components = best_feature_count) # only keep two "best" features!
X_train_pca = pca.fit_transform(X_train) # apply PCA to the train data
X_test_pca = pca.transform(X_test)

In [None]:
X_test_pca

## Entropy

In [None]:
def entropy_features(X):
    res_data=[]
    for i in range(len(X)):
        x = X[i]
                
        perm = ant.perm_entropy(x, normalize=True)
        spectral = ant.spectral_entropy(x, sf=160, method='welch', normalize=True)
        svd = ant.svd_entropy(x, normalize=True)
        approx = ant.app_entropy(x)
        sample = ant.sample_entropy(x)
        temp_ans = [perm, spectral, svd, approx, sample]
        res_data.append(temp_ans)
    return res_data

X_train_entropy = entropy_features(np.asarray(X_train))

X_test_entropy = entropy_features(np.asarray(X_test))


In [None]:
X_train_entropy_array = np.asarray(X_train_entropy)
X_test_entropy_array = np.asarray(X_test_entropy)

X_test_entropy_array.shape











In [None]:
X_train

## FFT theta band

In [None]:

def butter_bandpass_filter(data, lowcut, highcut, fs, order=2):
    nyq = 0.5 * fs
    low = lowcut /nyq
    high = highcut/nyq
    b, a = butter(order, [low, high], btype='band')
    #print(b,a)
    y = lfilter(b, a, data)
    return y
temp = X_train.values
temp1 = X_test.values

X_train_theta = []
for i in range(len(temp)):
    
    fft1 = fft (temp[i])
    theta1 = butter_bandpass_filter(fft1, 4.1, 8.0, 160)
    X_train_theta.append(theta1)

X_test_theta = []
for i in range(len(temp1)):
    
    fft1 = fft (temp1[i])
    theta1 = butter_bandpass_filter(fft1, 4.1, 8.0, 160)
    X_test_theta.append(theta1)

X_train_theta


## Bio orthogonal family

## TF -IDF

In [None]:
X_train_tfidf = X_train.values.tolist() 
X_train_tfidf_rounded = []
for i in range (len(X_train_tfidf)):
    temp = []
    for j in range (len(X_train_tfidf[i])):
        a = round(X_train_tfidf[i][j],2)
        temp.append (a)
    X_train_tfidf_rounded.append(temp)

X_train_tfidf_rounded_string = []
for i in range (len(X_train_tfidf_rounded)):
    temp = []
    for j in range (len(X_train_tfidf_rounded[i])):
        a = str(X_train_tfidf_rounded[i][j])
        temp.append (a)
    newtemp = " ".join(temp)
    X_train_tfidf_rounded_string.append(newtemp)


# for i in range (len(X_train_tfidf)):
#     X_tfidf_string = str (X_train_tfidf[i])
# # documentA = 'the man went out for a walk'
# # documentB = 'abcd'
# vectorizer = TfidfVectorizer()
# vectors = vectorizer.fit_transform(X_train_tfidf)
# feature_names = vectorizer.get_feature_names()
# dense = vectors.todense()
# denselist = dense.tolist()
# df = pd.DataFrame (denselist, columns=feature_names)
# cvec = CountVectorizer()
# cvec_counts = cvec.fit_transform(X_train_tfidf)















In [None]:
X_test_tfidf = X_test.values.tolist()
X_test_tfidf_rounded = []
for i in range (len(X_test_tfidf)):
    temp = []
    for j in range (len(X_test_tfidf[i])):
        a = round(X_test_tfidf[i][j],2)
        temp.append (a)
    X_test_tfidf_rounded.append(temp)
 
X_test_tfidf_rounded_string = []
for i in range (len(X_test_tfidf_rounded)):
    temp = []
    for j in range (len(X_test_tfidf_rounded[i])):
        a = str(X_test_tfidf_rounded[i][j])
        temp.append (a)
    newtemp = " ".join(temp)
    X_test_tfidf_rounded_string.append(newtemp)

In [None]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(X_test_tfidf_rounded_string)
feature_names = vectorizer.get_feature_names()
dense = vectors.todense()
denselist = dense.tolist()
X_test_tfidffeatures = pd.DataFrame (denselist, columns=feature_names)

In [None]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(X_)
feature_names = vectorizer.get_feature_names()
dense = vectors.todense()
denselist = dense.tolist()
X_train_tfidffeatures = pd.DataFrame (denselist, columns=feature_names)

# Model Training section

##  MLP Classifier (trial)

In [None]:
model_mlp = MLPClassifier( hidden_layer_sizes=(100), activation= 'relu', max_iter=1000, alpha=0.0001,
                           solver= 'adam', tol=0.0001 ) #changed from logistic to relu for better output

In [None]:
model_mlp.fit(X_train_pca, y_train) #train the data using the MLP

In [None]:
y_pred = model_mlp.predict(X_test_pca) # predict the class of Test

In [None]:
accuracy_mlp = accuracy_score(y_test, y_pred) #accuracy of test data

In [None]:
accuracy_mlp

## MLP Trial TFIDF

In [None]:
model_mlp.fit(X_train_tfidffeatures, y_train) #train the data using the MLP
y_pred = model_mlp.predict(X_test_tfidffeatures) # predict the class of Testa
accuracy_mlp = accuracy_score(y_test, y_pred) #accuracy of test data

accuracy_mlp

## Entropy Trial

In [None]:
model_mlp.fit(X_train_entropy_array, y_train) #train the data using the MLP
y_pred = model_mlp.predict(X_test_entropy_array) # predict the class of Test
accuracy_mlp = accuracy_score(y_test, y_pred) #accuracy of test data
accuracy_mlp

## SVM Classifier

In [None]:
model_svm = SVC(kernel='linear', C=1.0, random_state = 1 )
model_svm.fit(X_train_pca, y_train)     # do the training
y_pred2 = model_svm.predict(X_test_pca) # work on the test data

In [None]:
accuracy_svm = accuracy_score(y_test, y_pred2) #accuracy of test data

In [None]:
accuracy_svm

## KNN Classifier

In [None]:
model_knn = KNeighborsClassifier(n_neighbors = 2)
model_knn.fit(X_train_pca,y_train)

In [None]:
#Predict Output
y_pred3 = model_knn.predict(X_test_pca) 

In [None]:
accuracy_knn = accuracy_score(y_test, y_pred3) #accuracy of test data

In [None]:
accuracy_knn

## KNN Trial Entropy

In [None]:
model_knn_entropy = KNeighborsClassifier(n_neighbors = 4)
model_knn_entropy.fit(X_train_entropy_array,y_train)
#Predict Output
y_pred3 = model_knn_entropy.predict(X_test_entropy_array) 
accuracy_knn = accuracy_score(y_test, y_pred3) #accuracy of test data
accuracy_knn



## CNN Classifier

In [None]:
def cnn_evaluate(trainX, trainy, testX, testy):
	verbose, epochs, batch_size = 0, 10, 32
	n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
	model = Sequential()
	model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
	model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
	model.add(Dropout(0.5))
	model.add(MaxPooling1D(pool_size=2))
	model.add(Flatten())
	model.add(Dense(100, activation='relu'))
	model.add(Dense(n_outputs, activation='softmax'))
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	# fit network
	model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
	# evaluate model
	_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
	return accuracy

In [None]:
# y_train_array = np.asarray (y_train)
# y_test_array = np.asarray (y_test)

X_train_cnn = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = np.array(X_test).reshape(X_test.shape[0], X_test.shape[1], 1)


# cnn_accuracy = cnn_evaluate(X_train_pca.values, to_categorical(y_train_array), X_test_pca, to_categorical (y_test_array))

# cnn_accuracy

In [None]:
print("X Train shape: ", X_train_cnn.shape)
print("X Test shape: ", X_test_cnn.shape)

In [None]:
# Create sequential model 
cnn_model = tf.keras.models.Sequential()
#First CNN layer  with 32 filters, conv window 3, relu activation and same padding
cnn_model.add(Conv1D(filters=32, kernel_size=(3,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001), input_shape = (X_train_cnn.shape[1],1)))
#Second CNN layer  with 64 filters, conv window 3, relu activation and same padding
cnn_model.add(Conv1D(filters=64, kernel_size=(3,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001)))
#Third CNN layer with 128 filters, conv window 3, relu activation and same padding
cnn_model.add(Conv1D(filters=128, kernel_size=(3,), padding='same', activation=tf.keras.layers.LeakyReLU(alpha=0.001)))
#Fourth CNN layer with Max pooling
cnn_model.add(MaxPooling1D(pool_size=(3,), strides=2, padding='same'))
cnn_model.add(Dropout(0.5))
#Flatten the output
cnn_model.add(Flatten())
#Add a dense layer with 256 neurons
cnn_model.add(Dense(units = 256, activation=tf.keras.layers.LeakyReLU(alpha=0.001)))
#Add a dense layer with 512 neurons
cnn_model.add(Dense(units = 512, activation=tf.keras.layers.LeakyReLU(alpha=0.001)))
#Softmax as last layer with five outputs
cnn_model.add(Dense(units = 2, activation='softmax'))



In [None]:
cnn_model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
cnn_model.summary()

In [None]:
y_train_cnn = to_categorical(y_train)
y_test_cnn = to_categorical(y_test)
cnn_model_history = cnn_model.fit(X_train_cnn, np.asarray (y_train), epochs=2, batch_size = 20 , validation_data = (X_test_cnn, np.asarray (y_test)))

## K Means

In [None]:
n_clusters = len(np.unique(y_train))
kmeans_model = KMeans(n_clusters = n_clusters, random_state=42)
kmeans_model.fit(X_train_pca)
y_labels_train_knn = kmeans_model.labels_
y_labels_test_knn = kmeans_model.predict(X_test_pca)

# X_train['km_clust'] = y_labels_train
# X_test['km_clust'] = y_labels_test


    

accuracy_kmeans = accuracy_score(y_train, y_labels_train_knn)

accuracy_kmeans = accuracy_score (y_test, y_labels_test_knn)

accuracy_kmeans 





# Timepass

In [None]:
x=real_df.values[0]

In [None]:
x_ = attack_df.values[0]

In [None]:
px.line(x[:1000])

In [None]:
px.line(x_[:1000])

In [None]:
z_ = scipy.io.loadmat('data/inputSample.mat')

In [None]:
z_['data'].shape

In [None]:
a_ = scipy.io.loadmat('data/input_attack.mat')

In [None]:
a_['data'].shape

In [None]:
sd1 = [[1,2,3],[4,5,6]]
sd2 = [[7,8]]
sd1_df = pd.DataFrame(sd1)
sd2_df = pd.DataFrame(sd2)

In [None]:
sd1_df.transpose()

In [None]:
sd2_df.transpose()

In [None]:
pd.concat([sd1_df.transpose(), sd2_df.transpose()], axis=0)