In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

plt.rcParams["figure.figsize"] = (30, 4)
radius = 30

In [2]:
arr = np.random.randint(low=0, high=2, size=100_000)
# Let's not make the arr to large--to simulate the situation of financial market data.
print(arr)
print(len(arr))

[0 1 0 ... 1 1 0]
100000


In [3]:
def task(model, arr):
    col_count = 128
    df = pd.DataFrame()
    for i in range(arr.shape[0] - col_count):
        d = {}
        for j in range(0, col_count):
            d[f'x{j}'] = arr[i+j]
        d['y'] = arr[i + col_count]
        df = df.append(d, ignore_index=True)
    df.iloc[:,col_count] =df.iloc[:,col_count] > 0
    X = df.iloc[:,0:col_count]
    y = df.iloc[:,col_count]
    #print(X)
    #print(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = model.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    tn, fp, fn, tp = confusion_matrix(y_true=y_test, y_pred=y_pred).ravel()
    accuracy = (tn + tp) / (tn + fp + fn + tp)
    tpr = tp / (tp + fn)
    fpr = fp / (fp + tn)
    tnr = tn / (tn + fp)
    balanced_accuracy = (tpr + tnr) / 2
    print(f"pos={tp + fn}, neg={tn + fp}, tpr={tpr*100:2.1f}%, fpr={fpr*100:2.1f}%, tnr={tnr*100:2.1f}%, accuracy={accuracy*100:.1f}%, balanced_accuracy={balanced_accuracy*100:.1f}%")

In [4]:
from sklearn.ensemble import RandomForestClassifier as rfc
task(rfc(), arr.copy())

pos=12526, neg=12442, tpr=44.7%, fpr=44.3%, tnr=55.7%, accuracy=50.2%, balanced_accuracy=50.2%


In [5]:
from sklearn.linear_model import LogisticRegression as lr
task(lr(), arr.copy())

pos=12425, neg=12543, tpr=49.0%, fpr=49.0%, tnr=51.0%, accuracy=50.0%, balanced_accuracy=50.0%


In [6]:
from sklearn.svm import SVC
task(SVC(), arr.copy())

pos=12407, neg=12561, tpr=49.9%, fpr=49.7%, tnr=50.3%, accuracy=50.1%, balanced_accuracy=50.1%


In [7]:
from sklearn.neural_network import MLPClassifier as mlpc
task(mlpc(hidden_layer_sizes=(400, 400, 400, 400, 400)), arr.copy())

pos=12523, neg=12445, tpr=50.9%, fpr=51.3%, tnr=48.7%, accuracy=49.8%, balanced_accuracy=49.8%
