In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score
from keras.models import load_model
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tkinter import filedialog

In [3]:
from Saves.HelperFunctions import *
from Preprocessing import preprocess_text

In [4]:
data = pd.read_csv('Data/test.csv')

In [None]:
# Drop Nan
print(f"train_data.shape before {data.shape}")
data = data.dropna(subset=['Discussion'])
print(f"train_data.shape after {data.shape}")

In [6]:
X_test = data['Discussion'].astype(str)

In [None]:
X_test.head(2)

In [8]:
saves_dir = 'Delivaries'

# Preprocessing

In [9]:
# preprocessing
pre_method = 2
test_Discussion_preprocessed = [preprocess_text(discussion, pre_method) for discussion in X_test]

# FFNN

In [None]:
# load TF-IDF
with open(f'Saves/{saves_dir}/tfidf_vectorizer01.pkl', 'rb') as file:
    vectorizer = pickle.load(file)
    print('vectorizer loaded successfully...')

ffnn_X_test = vectorizer.transform(test_Discussion_preprocessed)

In [None]:
# load FFNN model

ffnn_model_path = 'FFNN-m4-e2-a92.h5'
ffnn_model = load_model(ffnn_model_path)

In [None]:
ffnn_predictions = ffnn_model.predict(ffnn_X_test)

# If it's a multi-class classification task, get the predicted class for each sample
ffnn_Y_pred = np.argmax(ffnn_predictions, axis=1)

In [None]:
if input('Press 0 to save the predictions') == '0':
    save_csv(data=ffnn_Y_pred, file_name=f'{saves_dir}/FFNN', header=['SampleID', 'Category'], numbering=True)

# GRU

In [None]:
# load tokenizer
with open(f'Saves/{saves_dir}/gru_tokenizer01.pkl', 'rb') as file:
    gru_tokenizer = pickle.load(file)
    print('tokenizer loaded successfully...')

In [None]:
X_test_seq = gru_tokenizer.texts_to_sequences(test_Discussion_preprocessed)

seq_len = 100
gru_X_test = pad_sequences(X_test_seq, maxlen=seq_len, padding='post')

In [None]:
# load GRU model

gru_model_path = 'Models\\RNN\\GRU-e10-a81.h5'
# gru_model_path = 'Models\\RNN\\GRU-m2-e9-a85'
gru_model = load_model(gru_model_path)

In [None]:
# Predict the labels for the test set
gru_predictions = gru_model.predict(gru_X_test)

# If it's a multi-class classification task, get the predicted class for each sample
gru_Y_pred = np.argmax(gru_predictions, axis=1)

In [None]:
if input('Press 0 to save the predictions') == '0':
    save_csv(data=gru_Y_pred, file_name=f'{saves_dir}/GRU', header=['SampleID', 'Category'], numbering=True)

# Transformers

In [None]:
# load tokenizer
with open(f'Saves/{saves_dir}/transformers_tokenizer01.pkl', 'rb') as file:
    trans_tokenizer = pickle.load(file)
    print('tokenizer loaded successfully...')

In [None]:
X_test_seq = trans_tokenizer.texts_to_sequences(test_Discussion_preprocessed)

# seq_len = 100
seq_len = 839

trans_X_test = pad_sequences(X_test_seq, maxlen=seq_len, padding='post')

In [16]:
# load Transformers model

trans_model_path = 'Models\\Transformers\\Trans-m2-e9-a69.h5'
# trans_model_path = 'Models\\Transformers\\Trans-m2-e15-a72.h5'
trans_model = load_model(trans_model_path)

TypeError: too many positional arguments

In [None]:
# Predict the labels for the test set
trans_predictions = gru_model.predict(trans_X_test)

# If it's a multi-class classification task, get the predicted class for each sample
trans_Y_pred = np.argmax(trans_predictions, axis=1)

In [None]:
if input('Press 0 to save the predictions') == '0':
    save_csv(data=trans_Y_pred, file_name=f'{saves_dir}/Transformers', header=['SampleID', 'Category'], numbering=True)

In [None]:
trans_old_Y_pred = pd.read_csv('Saves\\Trans-m2-e15.csv')['Category']

In [None]:
train_accuracy = accuracy_score(trans_Y_pred, trans_old_Y_pred)
print(f"Train Accuracy: {train_accuracy}")