In [None]:
#importing the libaries
import pandas as pd
import numpy as np
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import re
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from scipy import stats
from collections import Counter
from sklearn.decomposition import TruncatedSVD
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

# Loading the data set (4 features)
dataFrame = pd.read_csv("final_data.csv")
dataFrame.reset_index(drop=True)

In [None]:
def calcLatLong(each_df):
  train1 = each_df.iloc[:, :4]
  train1 = train1.diff().iloc[1:, :]
  new_names = {'head_pos_x': 'head_pos_lat', 'head_pos_y': 'head_pos_long', 'gaze_pos_x': 'gaze_pos_lat', 'gaze_pos_y': 'gaze_pos_long'}
  train1 = train1.rename(columns=new_names)
  train_ex = pd.concat([each_df, train1], axis=1)
  train_ex = train_ex.iloc[1:]
  participant_col = train_ex.pop('participant')
  train_ex = train_ex.assign(participant=participant_col)
  return train_ex

# **Latitude and Longitude**

In [None]:
# Feature extraction Stage 1 - Latitude and Longitude

# Create a dictionary to store the dataframes for each participant
participant_dfs = []

# Iterate over the each participant 
for participant in dataFrame["participant"].unique():
    # Create a dataframe for the current participant
    participant_df = dataFrame[dataFrame["participant"] == participant]
    # Store the participant dataframe in the dictionary with participant number as the key
    globals()[f"participant_{participant}_df"] = calcLatLong(participant_df)
    participant_dfs.append(calcLatLong(participant_df))

# Concatenate all participants dataframes
concatenated_df = pd.concat(participant_dfs)

# data frame with Latitude and Longitude Features
concatenated_df

In [None]:
dataFrame = concatenated_df
dataFrame = dataFrame.reset_index(drop=True)
dataFrame

In [None]:
# seperating the features from the data set
dataFrame
dataFrame1 = dataFrame.iloc[:,:8]
dataFrame1

*Frequency domain features using FFT*


In [None]:
# Tried to extract frequency domain features using fft, but got very low accuracy
# import numpy as np
# def calculate_frequency_domain_features(row):

#     readings = row.values
#     fft = np.fft.fft(readings)
#     frequency_domain_features = {'mean': np.abs(np.mean(fft)),
#                                  'std': np.abs(np.std(fft)),
#                                  'max': np.abs(np.max(fft)),
#                                  'min': np.abs(np.min(fft))}
#     return pd.Series(frequency_domain_features)
# df_train = pd.concat([df_train, df_train.apply(calculate_frequency_domain_features, axis=1)], axis=1)

# df_test = pd.concat([df_test, df_test.apply(calculate_frequency_domain_features, axis=1)], axis=1)

# Euclidean Distances 

In [None]:
# Feature extraction Stage 2 - Euclidean Distance between the Head Position in consecutive frames of each user

column5 = dataFrame1.iloc[:, 4]  
column6 = dataFrame1.iloc[:, 5]  # Square the values of the columns
column5_squared = np.square(column5)
column6_squared = np.square(column6)
sum_squared = column5_squared + column6_squared
# Take the square root of the sum
euclid_head = np.sqrt(sum_squared)
dataFrame1["euclid_head"] = euclid_head
dataFrame1

In [None]:
# Feature extraction Stage 2 - Euclidean Distance between the Eye Position in consecutive frames of each user

column7 = dataFrame1.iloc[:, 6]  
column8 = dataFrame1.iloc[:, 7]  # Square the values of the columns
column7_squared = np.square(column7)
column8_squared = np.square(column8)
sum_squared = column7_squared + column8_squared
# Take the square root of the sum
euclid_gaze = np.sqrt(sum_squared)
dataFrame1["euclid_gaze"] = euclid_gaze
dataFrame1

In [None]:
# All features related to head and gaze motion
# X - features and y - labels
X = dataFrame1.iloc[:,0:10]
y = dataFrame.iloc[:,8]

X['participant']  = y

Below two cells selects data seperately for analysing accuracies 

In [None]:
# # Un-comment before running the code to see Accuracies when only eye data is used to train the models 
# # Only features related to head motion
# X = X[['head_pos_x', 'head_pos_y','head_pos_lat','head_pos_long','euclid_head','participant']]
# X


In [None]:
# # Un-comment before running the code to see Accuracies when only gaze data is used to train the models 
# # Only features related to gaze motion
# X = X[['gaze_pos_x', 'gaze_pos_y','gaze_pos_lat','gaze_pos_long','euclid_gaze','participant']]
# X


In [None]:
# Splitting to test and train
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)
train, test = train_test_split(X, test_size = 0.25, random_state = 42)
train = train.reset_index(drop = True)
test = test.reset_index(drop = True)

In [None]:
X_train = train.iloc[:, :-1]
y_train = train.iloc[:, -1]
X_test = test.iloc[:, :-1]
y_test = test.iloc[: ,-1]

### **KNN MODEL**

In [None]:
# Best model for our project
# Initializing and Training the KNN model with the train data
knnClassifier = KNeighborsClassifier(n_neighbors=1)
knnClassifier.fit(X_train, y_train)

# Predicting the users for test data
y_pred2 = knnClassifier.predict(X_test)

# Prediction accuracy
accuracy = accuracy_score(y_test,y_pred2)
print(f"KNN Classifier Accuarcy ==> {accuracy} ")

### **DecisionTreeClassifier**

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"DecisionTreeClassifier Accuarcy ==> {accuracy} ")

### **GaussianNB**

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"GaussianNB Accuarcy ==> {accuracy} ")

### **Deep Neural Networks**

In [None]:
class MyDataset(Dataset):

    def __init__(self, df):
        x = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values

        self.x_train = torch.tensor(x, dtype=torch.float32)
        self.y_train = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, idx):
        return self.x_train[idx], self.y_train[idx]

In [None]:
train_set = MyDataset(train)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
test_set = MyDataset(test)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 45)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim = 1)
        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.1)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout1(out)
        out = self.relu(self.fc2(out))
        out = self.dropout2(out)
        out = self.fc3(out)
        return self.softmax(out)

In [None]:
model = NeuralNetwork().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
def Train(model, epochs, train_loader, optimizer, loss_func):
  for epoch in range(epochs):
      train_loss = 0
      train_acc = 0
      for batch_index, (input, target) in enumerate(train_loader):
          input = input.to(device)
          target = target.to(device)
          output = model(input)
          output_labels = torch.argmax(output, dim=1)
          # print(output_labels.shape)
          target = target - 1
          optimizer.zero_grad()
          loss = loss_func(output, target.long())


          loss.backward()
          optimizer.step()
          

          train_loss += loss.item()

          train_acc += torch.sum(output_labels == target.long())


      print('Train Epoch: {} Train Loss: {:.3f} Train Accuracy: {:.2f}'.format(epoch, train_loss/len(train), (train_acc/len(train)) * 100))
      
  return model

In [None]:
def eval(model, test_loader, loss_func):
    test_loss = 0
    test_acc = 0
    for batch_index, (input, target) in enumerate(test_loader):
        input = input.to(device)
        target = target.to(device)
        output = model(input)
        output_labels = torch.argmax(output, dim=1)
        target = target - 1
        loss = loss_func(output, target.long())

        test_loss += loss.item()

        test_acc += torch.sum(output_labels == target)


    print('Test Loss: {:.3f} Test Accuracy: {:.2f}'.format(test_loss/len(test), 100*(test_acc/len(test))))


In [None]:
epochs = 10
model_trained = Train(model.train(), epochs, train_loader, optimizer, loss_func)

In [None]:
 eval(model_trained.eval(), test_loader, loss_func)

# **Rocket Classifier**

In [None]:
!pip install sktime
from sktime.classification.kernel_based import RocketClassifier





[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
clf = RocketClassifier(num_kernels=500) 
temp_train = train.iloc[:100000, :-1].values
temp_label = train.iloc[:100000, -1]
temp_test = test.iloc[:, :-1].values
temp_label_test = test.iloc[:, -1]
clf.fit(temp_train, temp_label) 
y_pred = clf.predict(temp_test) 
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_pred, temp_label_test)
print("accuracy : " + str(acc))

*Below are some models we are trying to work after the presentation, but not complete*

In [None]:
#RandomForestClassifier
# from sklearn.ensemble import RandomForestClassifier

# # Create a Random Forest classifier with 100 trees
# rf = RandomForestClassifier(n_estimators=100, random_state=42)
# rf.fit(X_train, y_train)
# y_pred = rf.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)
# print('Random Forest Classifier Accuracy ==>', accuracy)



# SVM
# from sklearn.svm import SVC

# # Create an SVM classifier with a linear kernel
# svm = SVC(kernel='linear')
# svm.fit(X_train, y_train)
# y_pred = svm.predict(X_test)

# # Calculate accuracy, precision, recall, and F1 score
# accuracy = accuracy_score(y_test, y_pred)
# print('SVM Accuracy ==> ', accuracy)



#LogisticRegression
# from sklearn.linear_model import LogisticRegression

# # Create an LogisticRegression classifier model
# logistic = LogisticRegression()
# logistic.fit(X_train, y_train)
# y_pred = logistic.predict(X_test)

# # Calculate accuracy,
# accuracy = accuracy_score(y_test, y_pred)
# print('Logistic Regression Accuracy ==> ', accuracy)
