In [None]:
import os
import sys
sys.path.append('..')

import pandas as pd

from src.constants import get_constants
from src.features.config import CYEConfigPreProcessor, CYEConfigTransformer
from src.features.preprocessing import CYEPreProcessor, CYETargetTransformer
from src.features.great.features.unprocessing import CYEGReaTProcessor

from sklearn import metrics
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold

cst = get_constants()

In [None]:
config = CYEConfigPreProcessor(delna_thr=0.5)
processor = CYEPreProcessor(config=config)

In [None]:
great_processor = CYEGReaTProcessor()
generated_file = 'TrainGenerated-50000.csv'
df_train = great_processor.transform_merge(generated_file)

df_train = df_train[df_train[cst.target_column] < 5000]
X_train, y_train = df_train.drop(columns=cst.target_column), df_train[cst.target_column]
X_train = processor.fit_transform(X_train)

X_test = pd.read_csv(os.path.join(cst.file_data_test), index_col='ID')
X_test = processor.transform(X_test)

In [None]:
def compute_score(estimator1):
    bins_train = pd.qcut(y_train, q=10, duplicates='drop', labels=False)
    skf = StratifiedKFold(n_splits=5, shuffle=True)
    cv = skf.split(X=X_train, y=bins_train)

    y_pred = cross_val_predict(
        estimator=estimator1,
        X=X_train.to_numpy(),
        y=y_train.to_numpy(),
        cv=cv,
    )
    
    rmse = metrics.mean_squared_error(y_pred=y_pred, y_true=y_train, squared=False)
    print(rmse)

    return rmse


def submit(estimator2, score, model):
    submission = pd.DataFrame()
    estimator2.fit(X=X_train.to_numpy(), y=y_train.to_numpy())
    
    ID = X_test.index.values.tolist()
    Yield = estimator2.predict(X_test.to_numpy())
    submission = pd.Series(Yield, index=X_test.index)
    submission.name = 'Yield'
    
    name_submission = f'{model}_{score:.3f}'
    file_submission = os.path.join(os.path.join('..', cst.path_submissions), f'{name_submission}.csv')

    submission.to_csv(file_submission, index=True)

In [None]:
os.makedirs(os.path.join('..', cst.path_submissions), exist_ok=True)

In [None]:
from xgboost import XGBRegressor

estimator1 = XGBRegressor()
score = compute_score(estimator1)

estimator2 = XGBRegressor()
submit(estimator2, score, 'xgboost')

In [None]:
from lightgbm import LGBMRegressor

estimator1 = LGBMRegressor(verbosity=-1)
score = compute_score(estimator1)

estimator2 = LGBMRegressor(verbosity=-1)
submit(estimator2, score, 'lightgbm')

In [None]:
from catboost import CatBoostRegressor

estimator1 = CatBoostRegressor(verbose=0)
score = compute_score(estimator1)

estimator2 = CatBoostRegressor(verbose=0)
submit(estimator2, score, 'catboost')