In [20]:
import pandas as pd
import numpy as np
import os
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

seed = 42

In [21]:
#Load data
df = pd.read_csv(os.path.abspath("../data/labeled_training_data.csv"))

#Select all columns except the last
X = df.iloc[:, :-1]
#Select label column
y = df['increase_stock']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, stratify=y, random_state=seed)

In [25]:
baseline = DummyClassifier(strategy='uniform', constant=0, random_state=seed)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('dummy', baseline)])


pipeline.fit(X_train,y_train)

y_preds = baseline.predict(X_test)

print(f"F1 Score: {f1_score(y_preds ,y_test, zero_division=np.nan)}")
print(f"Accuracy Score: {accuracy_score(y_preds ,y_test)}")
print(f"Recall Score: {recall_score(y_preds ,y_test, zero_division=np.nan)}")
print(f"Precision Score: {precision_score(y_preds ,y_test, zero_division=np.nan)}")
try:
    print(f"ROC AUC Score: {roc_auc_score(y_preds ,y_test)}")
except ValueError:
    print(f"ROC AUC Score: {np.nan}")

F1 Score: 0.23963133640552994
Accuracy Score: 0.484375
Recall Score: 0.16352201257861634
Precision Score: 0.4482758620689655
ROC AUC Score: 0.48238212430173055
