In [None]:
import os
import logging

import pandas as pd
from tsfresh import select_features

from paddel.preprocessing.features import get_data, clean_data

os.environ["PADDEL_VIDEOS_DIR"] = "../data/raw"
os.environ["PADDEL_CACHE_DIR"] = "../data/cache"

logging.basicConfig(level=logging.INFO)

In [None]:
# Get data
y, misc_features, classic_features, fresh_features = get_data()

In [None]:
# Clean data
clean_data(y, misc_features, classic_features, fresh_features)

In [None]:
all_features = pd.merge(misc_features, classic_features, left_index=True, right_index=True)
all_features = pd.merge(all_features, fresh_features, left_index=True, right_index=True)

In [None]:
features_filtered = select_features(all_features, y)

In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import PolynomialFeatures, QuantileTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

clf = Pipeline([
    ("features", PolynomialFeatures(degree=2)),
    ("scale", QuantileTransformer(n_quantiles=20)),
    ("model", SVC()),
])

grid = GridSearchCV(
    estimator=clf,
    param_grid={
        "model__C": [1],
        "model__kernel": ["poly"],
        "model__degree": [3],
        "model__gamma": ["scale"],
        "model__class_weight": [{0: weight, 1: 1} for weight in [1.5]]
    },
    cv=10,
    n_jobs=-1,
    verbose=1,
)
grid.fit(features_filtered, y)

In [None]:
pd.DataFrame(grid.cv_results_)