In [24]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score
from keras.models import load_model
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tkinter import filedialog

In [27]:
from Saves.HelperFunctions import *
from Preprocessing import preprocess_text

In [39]:
data = pd.read_csv('Data/test.csv')

In [40]:
X = data['Discussion'].astype(str)

In [41]:
X.head(2)

0    Managing cash flow effectively is crucial for ...
1    Civic engagement plays a key role in a democra...
Name: Discussion, dtype: object

In [50]:
saves_dir = 'Delivaries'

## Helper Funcions

# FFNN

In [None]:
def ffnn_preprocess(test_data, pre_method):

    print('start preprocessing...')
    test_Discussion_preprocessed = [preprocess_text(discussion, pre_method) for discussion in test_data['Discussion']]
    print('TF-IDF...')
    with open(f'Saves/{saves_dir}/tfidf_vectorizer.pkl', 'rb') as file:
        vectorizer = pickle.load(file)
        print('vectorizer loaded successfully...')

    X_test = vectorizer.transform(test_Discussion_preprocessed)

    return X_test


In [None]:
ffnn_X_test = ffnn_preprocess(test_data=test_data, pre_method=2)

In [None]:
# ffnn_model_path = 'FFNN-m4-e2-a92.h5'
ffnn_model_path = filedialog.askopenfilename(filetypes=[("h5 Files", "*.h5")])
ffnn_model = load_model(ffnn_model_path)

In [None]:
ffnn_predictions = ffnn_model.predict(ffnn_X_test)

# If it's a multi-class classification task, get the predicted class for each sample
ffnn_Y_pred = np.argmax(ffnn_predictions, axis=1)

In [None]:
if input('Press 0 to save the predictions') == '0':
    save_csv(data=ffnn_Y_pred, file_name=f'{saves_dir}/FFNN', header=['SampleID', 'Category'], numbering=True)

# GRU

In [32]:
def gru_preprocess(X, pre_method):

    print('start preprocessing...')
    test_Discussion_preprocessed = [preprocess_text(discussion, pre_method) for discussion in X]
    with open(f'Saves/{saves_dir}/tokenizer.pkl', 'rb') as file:
        tokenizer = pickle.load(file)
        print('tokenizer loaded successfully...')

    X_test_seq = tokenizer.texts_to_sequences(test_Discussion_preprocessed)

    seq_length = 100
    X_test_padded = pad_sequences(X_test_seq, maxlen=seq_length, padding='post')

    return X_test_padded


In [42]:
gru_X_test_padded = gru_preprocess(X=X, pre_method=2)

start preprocessing...


In [25]:
# gru_model_path = 'Models\\RNN\\GRU-e10-a81.h5'
# gru_model_path = 'Models\\RNN\\GRU-m2-e9-a85'
gru_model_path = file_path = filedialog.askopenfilename(title="Select a file", filetypes=[("h5 files", "*.h5")])
gru_model = load_model(gru_model_path)



In [43]:
# Predict the labels for the test set
gru_predictions = gru_model.predict(gru_X_test_padded)

# If it's a multi-class classification task, get the predicted class for each sample
gru_Y_pred = np.argmax(gru_predictions, axis=1)

[1m330/330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step


In [51]:
if input('Press 0 to save the predictions') == '0':
    save_csv(data=gru_Y_pred, file_name=f'{saves_dir}/GRU', header=['SampleID', 'Category'], numbering=True)

Data saved to Saves/Delivaries/GRU.csv


# Transformers