Copy all the files located in https://github.com/crapher/medium/tree/main/13.TrendIntraday/result to your colab folder

In [1]:
import os
import glob
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# Get return a dataset with all the level 1 results
def generate_features_targets(files_pattern):

    files = glob.glob(files_pattern)

    x = None
    y = None
    for file in files:
        col_name = os.path.basename(file).split('.')[0]
        tmp_df = pd.read_csv(file)
        tmp_df.columns = [col_name, 'target']

        if y is None:
            y = tmp_df['target']
            x = tmp_df[[col_name]]
        else:
            x[col_name] = tmp_df[col_name]

    return x.values, y.values

In [3]:
# Show the result of the operation
def show_result(target, pred, ds_type='TEST'):

    target = np.array(target)
    pred = np.array(pred)

    print(f' RESULT {ds_type.upper()} '.center(56, '*'))

    print('* Confusion Matrix (Top: Predicted - Left: Real)')
    print(confusion_matrix(y_true=target, y_pred=pred))

    print('* Classification Report')
    print(classification_report(target, pred))

In [4]:
# Return the predicted values
def get_predicted_values(features):

    pred_y = np.where(features.sum(axis=1) > features.shape[1] / 2, 1, 0)
    return pred_y

In [5]:
# Get the datasets to be used in the tests
train_x, train_y = generate_features_targets('./*train.csv.gz')
val_x, val_y = generate_features_targets('./*val.csv.gz')
test_x, test_y = generate_features_targets('./*test.csv.gz')

In [6]:
# Predict and show train values
pred_y = get_predicted_values(train_x)
show_result(train_y, pred_y, 'TRAIN')

********************* RESULT TRAIN *********************
* Confusion Matrix (Top: Predicted - Left: Real)
[[ 832  477]
 [ 534 1015]]
* Classification Report
              precision    recall  f1-score   support

           0       0.61      0.64      0.62      1309
           1       0.68      0.66      0.67      1549

    accuracy                           0.65      2858
   macro avg       0.64      0.65      0.64      2858
weighted avg       0.65      0.65      0.65      2858



In [7]:
# Predict and show validation values
pred_y = get_predicted_values(val_x)
show_result(val_y, pred_y, 'VALIDATION')

****************** RESULT VALIDATION *******************
* Confusion Matrix (Top: Predicted - Left: Real)
[[125  84]
 [ 97 181]]
* Classification Report
              precision    recall  f1-score   support

           0       0.56      0.60      0.58       209
           1       0.68      0.65      0.67       278

    accuracy                           0.63       487
   macro avg       0.62      0.62      0.62       487
weighted avg       0.63      0.63      0.63       487



In [8]:
# Predict and show test values
pred_y = get_predicted_values(test_x)
show_result(test_y, pred_y)

********************* RESULT TEST **********************
* Confusion Matrix (Top: Predicted - Left: Real)
[[142 107]
 [ 94 186]]
* Classification Report
              precision    recall  f1-score   support

           0       0.60      0.57      0.59       249
           1       0.63      0.66      0.65       280

    accuracy                           0.62       529
   macro avg       0.62      0.62      0.62       529
weighted avg       0.62      0.62      0.62       529

