In [None]:
import pickle
import json
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

## Load dataset

In [None]:
train_df = pd.read_csv('../../../../resources/dataset/turnover/cleared_train.tsv', sep='\t')
test_df = pd.read_csv('../../../../resources/dataset/turnover/cleared_test.tsv', sep='\t')
original_test_df = pd.read_csv('../../../../resources/dataset/turnover/original_test.tsv', sep='\t')

train_df.fillna('', inplace=True)
test_df.fillna('', inplace=True)
original_test_df.fillna('', inplace=True)

with open('../../../../resources/dataset/turnover/label_encoder.pkl', 'rb') as fin:
    le = pickle.load(fin)

train_df.turnover = le.transform(train_df.turnover)
test_df.turnover = le.transform(test_df.turnover)
original_test_df.turnover = le.transform(original_test_df.turnover)

train_df.head()

## Train

In [None]:
np.random.seed(42)

cleared_test_pred = np.random.choice(
    train_df.turnover.value_counts(normalize=True).index,
    size=len(test_df),
    p=train_df.turnover.value_counts(normalize=True)
)

original_test_pred = np.random.choice(
    train_df.turnover.value_counts(normalize=True).index,
    size=len(original_test_df),
    p=train_df.turnover.value_counts(normalize=True)
)

## Evaluation

In [None]:
def get_report(y_pred, test_df):
    y_true = test_df.turnover.to_numpy()

    acc = accuracy_score(y_true, y_pred)
    confusion_mtx = confusion_matrix(y_true, y_pred)

    return {
        'accuracy': acc,
        'log_loss': -1,
        'confusion_matrix': confusion_mtx.tolist()
    }

In [None]:
report = {
    'cleared_test': get_report(cleared_test_pred, test_df),
    'original_test': get_report(original_test_pred, original_test_df)
}

with open('report.json', 'w') as fout:
    json.dump(report, fout)