#**Trương Thành Thắng - 20521907**

#**LINEAR DISCRIMINATIVE ANALYSIS**

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/Covid 19 Risk Prediction'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Covid 19 Risk Prediction


In [2]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import csv 
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle
import time
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import BernoulliNB, GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

#**EXPERIMENT**

In [3]:
def Experiment(model, save_path):
  fields = ['Data Num', 'Precision', 'Recall', 'F1 Score', 'Accuracy', 'Time', 'LDA Time']
  rows = []
  # Duyệt qua từng bộ
  for i in tqdm(range(1, 4)):
    data_train = pd.read_csv(f'./lan{i}/train.csv')
    data_test = pd.read_csv(f'./lan{i}/test.csv')

    X_train, y_train = data_train.iloc[:,:-1], data_train.iloc[:,-1]
    X_test, y_test = data_test.iloc[:,:-1], data_test.iloc[:,-1]

    # Giảm chiều dữ liệu bởi LINEAR DISCRIMINANT ANALYSIS
    lda = LDA(solver='svd', n_components=1)
    start = time.time()
    X_train = lda.fit_transform(X_train, y_train)
    X_test = lda.transform(X_test) 
    lda_time = time.time() - start
    
    # Huấn luyện mô hình
    start = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start
    with open(save_path + f'_{i}', 'wb') as files:
      pickle.dump(model, files)

    # Đánh giá mô hình
    y_pred = model.predict(X_test)
    rows.append([i, precision_score(y_test, y_pred), recall_score(y_test, y_pred), f1_score(y_test, y_pred), accuracy_score(y_test, y_pred), training_time, lda_time])
  
  # Lưu kết quả vào file csv
  with open(save_path + '.csv', 'w') as csvfile: 
    csvwriter = csv.writer(csvfile) 
    csvwriter.writerow(fields) 
    csvwriter.writerows(rows)

  result = np.mean(rows, axis=0)
  print('Result: ')
  print('Precision: ', result[1])
  print('Recall: ', result[2])
  print('F1 Score: ', result[3])
  print('Accuracy: ', result[4])
  print('Time: ', result[5])
  print('LDA Time: ', result[6])

##**DECISION TREES**

In [4]:
model = DecisionTreeClassifier(criterion='entropy', max_depth=25)
Experiment(model, 'Thang_LDA/LDA_DT')

  0%|          | 0/3 [00:00<?, ?it/s]

Result: 
Precision:  0.6220783931013482
Recall:  0.49606443432174574
F1 Score:  0.5518499520863388
Accuracy:  0.914755887808198
Time:  2.4825170834859214
LDA Time:  1.8514939943949382


##**NAIVE BAYES**

In [5]:
model = GaussianNB()
Experiment(model, 'Thang_LDA/LDA_NB')

  0%|          | 0/3 [00:00<?, ?it/s]

Result: 
Precision:  0.5076639836644957
Recall:  0.5818254120594157
F1 Score:  0.5422205823771219
Accuracy:  0.8960318858164218
Time:  0.05373915036519369
LDA Time:  1.774169921875


##**K-NEAREST NEIGHBOR**

In [6]:
model = KNeighborsClassifier(n_neighbors = 13, algorithm ='kd_tree',metric='manhattan')
Experiment(model, 'Thang_LDA/LDA_KNN')

  0%|          | 0/3 [00:00<?, ?it/s]

Result: 
Precision:  0.5998488418643529
Recall:  0.4877206887650838
F1 Score:  0.5379997185235637
Accuracy:  0.9113583436075071
Time:  0.3475480079650879
LDA Time:  2.1206518014272056


##**LOGISTIC REGRESSION**

In [7]:
model = LogisticRegression(solver='saga', penalty='l1', random_state = 1907)
Experiment(model, 'Thang_LDA/LDA_LR')

  0%|          | 0/3 [00:00<?, ?it/s]

Result: 
Precision:  0.5812973234540965
Recall:  0.4232334156781881
F1 Score:  0.4898242429016266
Accuracy:  0.9066985437853753
Time:  2.3558476765950522
LDA Time:  1.8002824783325195


##**NEURAL NETWORKS**
Chạy trên Source code của Phuc_NN

##**SUPPORT VECTOR MACHINE**
Thời gian chạy lâu, colab dễ bị disconnect nên chạy local