In [1]:
import numpy as np
import pandas as pd

In [14]:
from sklearn.model_selection import train_test_split, ParameterGrid
from scipy.stats import spearmanr
from sklearn.metrics import accuracy_score

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [4]:
train = pd.read_csv("train.csv", index_col = 0)
validation = pd.read_csv("validation.csv", index_col = 0)
test = pd.read_csv("test.csv", index_col = 0)

In [7]:
X_train, y_train = train.drop(["Date", "Identifier", "Dep_Var"], axis = 1), train["Dep_Var"]
X_val, y_val = validation.drop(["Date", "Identifier", "Dep_Var"], axis = 1), validation["Dep_Var"]
X_test, y_test = test.drop(["Date", "Identifier", "Dep_Var"], axis = 1), test["Dep_Var"]

#### Random Forest :

In [10]:
params = {"n_estimators" : [100],
          "max_depth" : [2, 5, 10, 20, 50],
          "min_samples_split" : [5, 10, 50, 100, 200]}

In [11]:
grid = ParameterGrid(params)

In [17]:
results_rf = []
for param in grid :
    md, ms, ne = param['max_depth'], param['min_samples_split'], param['n_estimators']
    model = RandomForestClassifier(n_estimators=ne, max_depth=md, min_samples_split=ms)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    acc, spear = accuracy_score(y_val, preds), spearmanr(y_val, preds)
    print(f"\r{ne}, {md}, {ms}, {acc}, {spear[0]}", end = "")
    results_rf.append([ne, md, ms, acc, spear[0]])

100, 50, 200, 0.09016987360115933, 0.04508650861243464

In [20]:
res = pd.DataFrame(results_rf, columns = ["n_estimators", "max_depth", "min_samples_split", "accuracy", "spearman"])
res.sort_values(by = "spearman", ascending = False).head()

Unnamed: 0,n_estimators,max_depth,min_samples_split,accuracy,spearman
2,100,2,50,0.095644,0.075744
0,100,2,5,0.09178,0.065749
4,100,2,200,0.093229,0.054696
9,100,5,200,0.091539,0.053645
3,100,2,100,0.09017,0.053188


#### SVM :

In [25]:
params = {"C" : [0.01, 0.1, 1, 10]}
grid = ParameterGrid(params)

In [26]:
results_svm = []
for param in grid :
    c = param["C"]
    model = SVC(C = c, gamma = "auto")
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    acc, spear = accuracy_score(y_val, preds), spearmanr(y_val, preds)
    print(f"\r{c}, {acc}, {spear[0]}", end = "")
    results_svm.append([c, acc, abs(spear[0])])

10, 0.07841558650672249, 0.054134502158857234656

In [28]:
res_svm = pd.DataFrame(results_svm, columns = ["c", "accuracy", "spearman"])
res_svm.sort_values(by = "spearman", ascending = False).head()

Unnamed: 0,c,accuracy,spearman
0,0.01,0.074471,0.061881
3,10.0,0.078416,0.054135
2,1.0,0.087916,0.045933
1,0.1,0.088077,0.025643
