# DS 6050 Final Project 10 Fold CV Code
### Connie Cui

## Set up

Need the following files:
- All_folds.csv
- test_data.csv
- test_labels.csv
- train_data.csv
- train_labels.csv

In [None]:
### Load necessary libraries ###
import glob
import os
import librosa
import librosa.display
import skimage
import numpy as np
import pandas as pd

pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm import tqdm

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical 

In [None]:
train_data = pd.read_csv("train_data.csv", header = None)
test_data = pd.read_csv("test_data.csv", header = None)
train_lab = pd.read_csv("train_labels.csv", header = None)
test_lab = pd.read_csv("test_labels.csv", header = None)

In [None]:
len(train_data)+len(test_data)
# should have 8732, need to make sure we dont have one of the rows as the header 
# since we are importing what was previously a np array to csv/df

8730

In [None]:
X=pd.concat([train_data, test_data]) # .to_numpy().reshape(8732, 40, 5, 1)
Y=pd.concat([train_lab,test_lab]) # .to_numpy().reshape(8732,)

In [None]:
folds = pd.read_csv("All_folds.csv", header=None)
folds.columns = ['folds']
folds['folds'] = folds['folds'].astype('int')

0    float64
dtype: object

In [None]:
### Train and evaluate via 10-Folds cross-validation ###
accuracies = []

for i in range(1, 11):  # 1-10 to match values in folds df
  # obtain train and test indices
  test_idx = list(np.where(folds['folds'] == i)[0]) # find all indices for fold i and set as test indices
  train_idx = list(np.setdiff1d(folds.index.to_numpy(), np.where(folds['folds'] == i)[0])) # everything except fold i as train indices
  
  # use train and test indices to create train and test x/y and reshape them for training our model
  x_train = X.iloc[train_idx].to_numpy().reshape(len(train_idx), 40, 5, 1)
  y_train = Y.iloc[train_idx].to_numpy().reshape(len(train_idx),)
  x_test = X.iloc[test_idx].to_numpy().reshape(len(test_idx), 40, 5, 1)
  y_test = Y.iloc[test_idx].to_numpy().reshape(len(test_idx),)

  # insert model architecture here (just put the alexnet for reference, will need to update this for each model)
  pool_size = (2, 2)
  kernel_size = (3, 3)
  input_shape = (40, 5, 1)
  num_classes = 10

  model = Sequential()
  model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_shape))
  model.add(MaxPool2D(pool_size=(2, 2)))
  model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
  model.add(MaxPool2D(pool_size=(2, 2)))
  model.add(Dropout(0.1))
  model.add(Flatten())
  model.add(Dense(1024, activation = "tanh"))
  model.add(Dense(10, activation = "softmax"))

  optimizer = keras.optimizers.Adam(lr=1e-4)
  model.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
  model.fit(x_train, y_train, epochs = 50, batch_size = 50, validation_data = (x_test, y_test))


  # add accuracy to our accuracies list for comparison
  accuracies.append(model.evaluate(x_test, y_test)[1])
  # index 0 is validation loss in final epoch, index 1 is validation accuracy in final epoch
  # if we try and look into other metrics, they may be in a different index and 
  # we would need ot make a separate list for those metrics



In [None]:
fold_acc = pd.DataFrame(list(zip(range(1,11), accuracies)),
                        columns =['Folds', 'Validation Accuracy']) 

fold_acc

## if looking into multiple models, make sure to create multiple accuracy lists and can add them in this 
# dataframe as well for side by side comparison of diff models and their accuracies by fold