In [13]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report


ratioKeys = ['psRatio', 'peRatio', 'priceToBook', 'evToEbitda', 'evToEbit', 'priceToFreeCashFlow', 'evToSales', 'evToGrossProfit', 'priceToGrossProfit']
yKeys = ['yAdjustedPriceSpinoffExcl1Year', 'yAdjustedPrice1Year', 'yPrice1Year']
y2Keys = ['yAdjustedPriceSpinoffExcl2Year', 'yAdjustedPrice2Year', 'yPrice2Year']
yAlpha = ['alpha1Year','adjustedAlpha1Year','equalAlpha1Year','equalAdjustedAlpha1Year','alpha2Year','adjustedAlpha2Year','equalAlpha2Year','equalAdjustedAlpha2Year']

numericalKeys = ratioKeys + yKeys + y2Keys
relativeRatioKeys = [x + '_relative' for x in ratioKeys]

In [14]:
df = pd.read_csv('../data/combined_inner.csv')
X = df[ratioKeys + relativeRatioKeys]
# yLabels = yKeys + y2Keys + yAlpha

for yLabel in yAlpha:
    yContinuous = df[yLabel]
    y = np.where(yContinuous <= 0, 0, 1)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear']
    }

    grid = GridSearchCV(SVC(), param_grid, cv=3)
    grid.fit(X_train, y_train)
    
    y_pred = grid.predict(X_test)
    print("Best parameters:", grid.best_params_)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    # TODO: Do I train this for each y column?
    break

Best parameters: {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.56      0.59      3081
           1       0.59      0.65      0.62      2972

    accuracy                           0.61      6053
   macro avg       0.61      0.61      0.61      6053
weighted avg       0.61      0.61      0.61      6053
