In [None]:
%load_ext autoreload
%autoreload 2

import logging

logging.basicConfig(level=logging.INFO)

In [None]:
from pathlib import Path

import pandas as pd

from paddel.preprocessing import get_data

misc_df, classic_df, fresh_df, y = get_data(Path("../data/raw"), Path("../data/cache"))

In [None]:
datasets = {
    "basic": {
        "data": misc_df,
        "params": {},
    },
    "classic": {
        "data": pd.concat([misc_df, classic_df], axis=1),
        "params": {},
    },
    "fresh": {
        "data": pd.concat([misc_df, fresh_df], axis=1),
        "params": {
            "n_features": [10, 20, 40, 80, 160, 240, 320, 400, 480, 560, 640, 720],
        },
    },
    "full": {
        "data": pd.concat([misc_df, classic_df, fresh_df], axis=1),
        "params": {
            "n_features": [10, 20, 40, 80, 160, 240, 320, 400, 480, 560, 640, 720],
        },
    },
}

In [None]:
from sklearn.svm import SVC
from paddel.preprocessing.transformer import FeatureSelector
from sklearn.preprocessing import QuantileTransformer
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("scaler", QuantileTransformer(n_quantiles=20).set_output(transform="pandas")),
    ("select", FeatureSelector(n_features=320)),
    ("model", SVC(C=0.5, coef0=2, degree=8, gamma="auto", kernel="poly"))
])

In [None]:
pipe.fit(datasets["full"]["data"], y)

In [None]:
import pickle

with open(Path("../data/model.pkl"), "wb") as f:
    pickle.dump(pipe, f)