In [21]:
from SeparatePredictor import SeparatePredictor
from train_tools import LSTMRegressor
import pandas as pd
import torch
import numpy as np

from sklearn.metrics import mean_squared_error, r2_score
from flaml import AutoML
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer

import warnings
warnings.simplefilter('ignore')

In [2]:
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
params = {
    'input_size': 3,
    'hidden_dim':64,
    'n_layers': 2,
    'device':DEVICE
}
df = pd.read_csv('train.csv')
df_t = pd.read_csv('test.csv')

predictor = SeparatePredictor(models = [LSTMRegressor], params=params, data=df)
predictor.prepare()
predictor.train(device=DEVICE, epochs=4500,
                learning_rate=1e-4)
preds = predictor.predict(df_t, device=DEVICE)
print(df_t.shape)
print(preds.shape)

TRAINING fcoin_1
cuda:0


Train Loss: 2.66; Val Loss: 6.09: 100%|██████████| 4500/4500 [01:36<00:00, 46.61it/s]


Total r2 score 0.02825482486012776
Total mse score 6.408326148986816
TRAINING fcoin_2
cuda:0


Train Loss: 2.64; Val Loss: 2.50: 100%|██████████| 4500/4500 [01:44<00:00, 43.01it/s]


Total r2 score -0.030839449482138903
Total mse score 2.2637577056884766
TRAINING fcoin_3
cuda:0


Train Loss: 2.60; Val Loss: 4.60: 100%|██████████| 4500/4500 [01:36<00:00, 46.54it/s]


Total r2 score -0.05088303594706489
Total mse score 4.698777675628662
TRAINING fcoin_4
cuda:0


Train Loss: 3.27; Val Loss: 4.45: 100%|██████████| 4500/4500 [01:37<00:00, 46.38it/s]


Total r2 score -0.13625975815084534
Total mse score 4.6782684326171875
TRAINING fcoin_5
cuda:0


Train Loss: 2.42; Val Loss: 2.68: 100%|██████████| 4500/4500 [01:36<00:00, 46.49it/s]


Total r2 score -0.06372646147231897
Total mse score 2.7348034381866455
TRAINING fcoin_6
cuda:0


Train Loss: 2.64; Val Loss: 2.82:  12%|█▏        | 519/4500 [00:11<01:27, 45.71it/s]


KeyboardInterrupt: 

In [None]:

for n in predictor.models['coin_1'].parameters():
    print(n.device)

In [None]:
model = [LSTMRegressor]
model = model[0](**params).to(device=DEVICE)
model

In [22]:
class Pipeline_autoML:
    """
    Пайплайн для некого пандаса одной монеты уже с выброшенными лишними фичами.
    """
    def __init__(self, impute_strategy='mean'):
        self.imputer = SimpleImputer(strategy=impute_strategy)
        self.scaler = MinMaxScaler()

    def fit(self, X_raw: pd.DataFrame, y_raw: pd.Series = None):
        self.imputer.fit(X_raw)
        self.scaler.fit(X_raw)

    def transform(self, X_raw: pd.DataFrame, y_raw=None):
        res = self.imputer.transform(X_raw)
        res = self.scaler.transform(res)

        if y_raw is not None:
            return res, y_raw
        return res

    def fit_transform(self, *args, **kwargs):
        self.fit(*args)
        return self.transform(*args, **kwargs)

In [79]:
class AutoMLpred(SeparatePredictor):
    def prepare(self):
        self.devided_data, self.y_devided, self.class_labels = self.separate_data(self.raw_data, self.y)
        self.models = dict(zip(self.class_labels, self.models))
        self.set_up_pipeline()

        self.train_test_split()

        self.fit_preprocess()
        self.X_train = self.prepare_data(self.X_train)
        self.X_test = self.prepare_data(self.X_test)

    def set_up_pipeline(self):
            """
            here we will define our preprocess pipeline as well as training functions
            :return:
            """
            self.preprocess_p = dict(zip(self.class_labels, [Pipeline_autoML() for _ in range(self.num_classes)]))
            self.train_f = None
            self.validate_f = None

    def train(self, train_params={}):
        for cls in self.class_labels:
            print(f"TRAINING f{cls}")
            self.models[cls].fit(self.X_train[cls], self.y_train[cls], **train_params)

            preds = self.models[cls].predict(self.X_train[cls])
            r2 = r2_score(self.y_train[cls], preds)
            mse = mean_squared_error(self.y_train[cls], preds)

            print(f"Total train r2 score {r2}")
            print(f"Total train mse score {mse}")

            preds = self.models[cls].predict(self.X_test[cls])
            r2 = r2_score(self.y_test[cls], preds)
            mse = mean_squared_error(self.y_test[cls], preds)

            print(f"Total test r2 score {r2}")
            print(f"Total test mse score {mse}")

    def predict(self, X:pd.DataFrame):
        res = pd.DataFrame(index=X.index, data=np.zeros(X.shape[0]))

        X_separated, classes = self.separate_data(X)
        X_ds = self.prepare_data(X_separated)
        out = {}
        for cls in classes:
            out[cls] = self.models[cls].predict(X_ds[cls])
            res.loc[X[self.feat_devide] == cls] = out[cls].reshape(-1,1)

        return res

    def evaluate(self):
        preds = self.predict(self.X_test)
        r2 = r2_score(self.df[self.target].values, preds)
        mse = mean_squared_error(self.df[self.target].values, preds)

        print(f"Total r2 score {r2}")
        print(f"Total mse score {mse}")

In [80]:
df.columns

Index(['Unnamed: 0', 'Id', 'date', 'coin_id', 'fwd_ret_3d', 'feat_1', 'feat_2',
       'feat_3', 'feat_4', 'feat_5', 'feat_6', 'feat_7', 'feat_8', 'feat_9',
       'feat_10'],
      dtype='object')

In [92]:
train_params = {
    'task': "regression",
    'time_budget': 600,
    'metric': 'mse',
    'verbose': 0,
}
features = ['feat_1', 'feat_2', 'feat_3', 'feat_4', 'feat_5', 'feat_6', 'feat_8', 'feat_9', 'feat_10']
coins = ['coin_1', 'coin_2', 'coin_3', 'coin_4']
df_test = df.loc[df['coin_id'].isin(coins)]


auto_ml = AutoMLpred(models=[AutoML], data=df_test, relevant_features=features)
auto_ml.prepare()
auto_ml.train(train_params)

# automl = AutoML()
# automl.fit(x_train, y_train, task="regression", time_budget=600, metric = 'mse')
# y_pred = automl.predict(x_test)

TRAINING fcoin_1
Total train r2 score 0.8771032717042755
Total train mse score 0.3608563388783512
Total test r2 score 0.06225745126906734
Total test mse score 6.184090429286921
TRAINING fcoin_2


KeyboardInterrupt: 

In [69]:
preds = auto_ml.predict(df)
preds

Unnamed: 0,0
0,-0.155895
1,-0.198345
2,-2.101173
3,2.303399
4,0.553546
...,...
36720,0.085418
36721,-0.111529
36722,0.007720
36723,-0.437889


In [73]:
r2 = r2_score(df['fwd_ret_3d'][32000:].values, preds[32000:])
mse = mean_squared_error(df['fwd_ret_3d'][32000:].values, preds[32000:])

print(r2)
print(mse)

-0.23205342813902097
3.6489872056279213


In [83]:
# preds = auto_ml.predict(df_t)
preds["Id"]=df_t['Id']
preds.index = df_t['Id']
preds["Predicted"] = preds[0]
preds.drop(0,axis=1)
preds.to_csv('./submission.csv')

In [93]:
preds.to_csv('./submission.csv', index=False)