# Import Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from statistics import mode
from collections import Counter

# Compiled Metrics Results

In [60]:
# calculate RMSE
modes = ['LSTM' ,'GRU','RNN']
EMITEN = 'INCO'
TARGET_DAYS = [1, 5, 10, 20, 50]
MODEL_LIST = ['Price Prediction', 'Deep Learning', 'Machine Learning']

ensemble_results_dir = f'Ensemble Result/{EMITEN}'
if not os.path.exists(ensemble_results_dir):
    os.makedirs(ensemble_results_dir)

result_file_path = f'{ensemble_results_dir}/{EMITEN}_Results.csv'
header_results = f'emiten,model_type,target_day,accuracy,precision,recall,f1_score'
with open(result_file_path, 'w') as writer:
    writer.write(f'{header_results}\n')

all_predictions = []
for MODEL in MODEL_LIST:
    for TARGET_DAY in TARGET_DAYS:
        if MODEL == 'Price Prediction':
            data_path = f'Result {MODEL}/{EMITEN}/LSTM_{EMITEN}_Target_{TARGET_DAY}.csv'
        else:
            data_path = f'Result {MODEL}/{EMITEN}/{EMITEN}_Target_{TARGET_DAY}.csv'
    
        result_df = pd.read_csv(data_path)
        test_df = result_df[result_df['type'] == 'test']
        testCategoryPredict = test_df['prediction'].values
        testCatY = test_df['ground_truth'].values

        test_acc = round(accuracy_score(list(testCatY), testCategoryPredict), 4)
        test_prec = round(precision_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)
        test_rec = round(recall_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)
        test_f1 = round(f1_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)

        test_results = f'{EMITEN},{MODEL},{TARGET_DAY},{test_acc},{test_prec},{test_rec},{test_f1}'

        with open(result_file_path, 'a') as writer:
            writer.write('{}\n'.format(test_results))
        all_predictions.append(testCategoryPredict)
        print(f'{MODEL} Target {TARGET_DAY} Done')

Price Prediction Target 1 Done
Price Prediction Target 5 Done
Price Prediction Target 10 Done
Price Prediction Target 20 Done
Price Prediction Target 50 Done
Deep Learning Target 1 Done
Deep Learning Target 5 Done
Deep Learning Target 10 Done
Deep Learning Target 20 Done
Deep Learning Target 50 Done
Machine Learning Target 1 Done
Machine Learning Target 5 Done
Machine Learning Target 10 Done
Machine Learning Target 20 Done
Machine Learning Target 50 Done


In [61]:
total_ensemble = len(TARGET_DAYS)
ensemble_preds = np.zeros([all_predictions[0].shape[0], total_ensemble])
ensemble_preds.shape

np_all_prediction = all_predictions[0]
np_all_prediction = np_all_prediction.reshape([-1, 1])

## concat all prediction
for preds in all_predictions[1:]:
    np_all_prediction = np.hstack((np_all_prediction, preds.reshape([-1, 1])))

# concat prediction for same targe dat
for i in range(total_ensemble):
    for j in range(np_all_prediction.shape[0]):
        target = 'None'
        tmp = np_all_prediction[j]
        
        # find prediction for the same target day
        tmp = (tmp[i + (total_ensemble * 0)], tmp[i + (total_ensemble * 1)], tmp[i + total_ensemble * 2])
        label = Counter(tmp).most_common()[0]
        if label[1] == 1:
            label = 1
        else:
            label = label[0]
        ensemble_preds[j, i] = label

ensemble_preds[:5]

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 2.],
       [0., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1.]])

In [62]:
result_df = pd.read_csv(data_path)
for i in range(total_ensemble):
    result_df = pd.read_csv(data_path)
    test_df = result_df[result_df['type'] == 'test']
    testCatY = test_df['ground_truth'].values
    testCategoryPredict = ensemble_preds[:, i]

    test_acc = round(accuracy_score(list(testCatY), testCategoryPredict), 4)
    test_prec = round(precision_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)
    test_rec = round(recall_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)
    test_f1 = round(f1_score(list(testCatY), testCategoryPredict, average='weighted', zero_division=0), 4)

    test_results = f'{EMITEN},Ensemble Model,{TARGET_DAYS[i]},{test_acc},{test_prec},{test_rec},{test_f1}'

    with open(result_file_path, 'a') as writer:
        writer.write('{}\n'.format(test_results))

In [63]:
tmp = pd.read_csv(result_file_path)
tmp

Unnamed: 0,emiten,model_type,target_day,accuracy,precision,recall,f1_score
0,INCO,Price Prediction,1,0.4877,0.4694,0.4877,0.3369
1,INCO,Price Prediction,5,0.4098,0.3816,0.4098,0.3872
2,INCO,Price Prediction,10,0.5574,0.4487,0.5574,0.4345
3,INCO,Price Prediction,20,0.6311,0.6085,0.6311,0.5832
4,INCO,Price Prediction,50,0.7582,0.7825,0.7582,0.7572
5,INCO,Deep Learning,1,0.4795,0.3573,0.4795,0.3643
6,INCO,Deep Learning,5,0.6189,0.6427,0.6189,0.6218
7,INCO,Deep Learning,10,0.8156,0.8188,0.8156,0.8158
8,INCO,Deep Learning,20,0.8975,0.9002,0.8975,0.8971
9,INCO,Deep Learning,50,0.9139,0.9146,0.9139,0.9138
