In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score

In [2]:
def read_dataset():
    df = pd.read_csv("featMat.csv", header=0)
    df = df.drop(columns=['doc id', 'phone id'])
    df.dropna(inplace=True)
    return df

In [3]:
def number_of_attackers(y_pred, df, threshold):
    allSimilarPatterns = []
    realAttackers=set([])
    for i in range(len(y_pred)):
        if y_pred[i]:
            allSimilarPatterns.append(x.iloc[i]['user id'])        
    allSimilarPatterns = sorted(allSimilarPatterns)
    for i in range(len(allSimilarPatterns)):
        count=0
        var = allSimilarPatterns[i]
        for j in range(len(allSimilarPatterns)):
            if var == allSimilarPatterns[j]:
                count+=1
            elif i < j:
                i = j
                break
            if count > threshold - 1:
                realAttackers.add(var)
                break
    if realAttackers:
        print(f'list of attackers with at least {threshold} patterns similarity: {realAttackers}')
    else:
        print(f'no possible attacker has found with at least {threshold} patterns similarity.')

In [4]:
df = read_dataset()
threshold = 5
for i in range(1,32):
    x = df[df['user id'] < 32]
    x = x.sample(frac = 1)
    x_train = x.drop(columns='user id')
    y_train = x['user id'] == i
    
    x = df[df['user id'] > 31]
    x_test = x.drop(columns='user id')
    y_test = x['user id'] == i
    
    print(f'Attacks against user {i} has started...')
    
    svclassifier = svm.SVC(kernel='sigmoid')
    svclassifier.fit(x_train, y_train)
    y_pred = svclassifier.predict(x_test)
    print('Accuracy of Support Vector Machine: %0.2f' % (accuracy_score(y_test, y_pred)*100), '%')
    number_of_attackers(y_pred, x, threshold)
    
    knn = KNeighborsClassifier()
    knn.fit(x_train, y_train)
    y_pred = knn.predict(x_test)
    print('Accuracy of K-Neighbors Classifier: %0.2f' % (accuracy_score(y_test, y_pred)*100), '%')
    number_of_attackers(y_pred, x, threshold)

    clf = MLPClassifier(random_state=1, max_iter=300)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print('Accuracy of MLP Classifier: %0.2f' % (accuracy_score(y_test, y_pred)*100), '%')
    number_of_attackers(y_pred, x, threshold)
    
    km = KMeans(algorithm='elkan')
    km.fit(x_train)
    y_pred = km.predict(x_test)
    print('Accuracy of K-means: %0.2f' % (accuracy_score(y_test, y_pred)*100), '%')
    number_of_attackers(y_pred, x, threshold)
    print('------------------')

Attacks against user 1 has started...
Accuracy of Support Vector Machine: 99.11 %
list of attackers with at least 5 patterns similarity: {33.0, 36.0, 38.0}
Accuracy of K-Neighbors Classifier: 99.96 %
no possible attacker has found with at least 5 patterns similarity.
Accuracy of MLP Classifier: 93.06 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0}
Accuracy of K-means: 93.74 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0}
------------------
Attacks against user 2 has started...
Accuracy of Support Vector Machine: 98.50 %
list of attackers with at least 5 patterns similarity: {33.0, 36.0, 38.0, 39.0}
Accuracy of K-Neighbors Classifier: 98.71 %
list of attackers with at least 5 patterns similarity: {33.0, 34.0, 35.0, 36.0, 38.0, 39.0}
Accuracy of MLP Classifier: 98.75 %
list of attackers with at least 5 patterns similarity: {33.0, 34.0, 35.0, 38.0, 39.0}
Ac

Accuracy of Support Vector Machine: 99.09 %
list of attackers with at least 5 patterns similarity: {33.0, 36.0, 38.0, 39.0}
Accuracy of K-Neighbors Classifier: 99.98 %
no possible attacker has found with at least 5 patterns similarity.
Accuracy of MLP Classifier: 98.79 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35.0, 38.0, 40.0}
Accuracy of K-means: 93.74 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0}
------------------
Attacks against user 17 has started...
Accuracy of Support Vector Machine: 98.65 %
list of attackers with at least 5 patterns similarity: {33.0, 36.0, 38.0, 39.0}
Accuracy of K-Neighbors Classifier: 99.43 %
list of attackers with at least 5 patterns similarity: {35.0}
Accuracy of MLP Classifier: 99.98 %
no possible attacker has found with at least 5 patterns similarity.
Accuracy of K-means: 93.66 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35

Accuracy of Support Vector Machine: 99.13 %
list of attackers with at least 5 patterns similarity: {33.0, 36.0, 38.0}
Accuracy of K-Neighbors Classifier: 99.94 %
no possible attacker has found with at least 5 patterns similarity.
Accuracy of MLP Classifier: 99.87 %
no possible attacker has found with at least 5 patterns similarity.
Accuracy of K-means: 1.18 %
list of attackers with at least 5 patterns similarity: {32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0}
------------------
