In [None]:
import pandas as pd, wandb, warnings, optuna, joblib
warnings.filterwarnings('ignore')
from features.extractor import FeatureExtractor
from features.final_processing import CustomColumnTransformer
from tuning.optuna_tuning import OptunaTuner
from configs import utils
utils.login_wandb()
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline

In [None]:
try:
    customers, sales = joblib.load('customers.joblib'), joblib.load('sales.joblib')
except:
    customers, sales = pd.read_excel('ucy_eko_data.xlsx', sheet_name='smile_customers'), pd.read_excel('ucy_eko_data.xlsx', sheet_name='smile_sales')
    joblib.dump(customers, 'customers.joblib')
    joblib.dump(sales, 'sales.joblib')

In [None]:
fe = FeatureExtractor(sales=sales, customers=customers, target_month=3, perform_split=True, generation_type='continuous', filtering_set='sales', period=60, subperiod=15)
X_train, X_test, y_train, y_test = fe.transform()

In [None]:
qty_cols = [col for col in X_train.columns if 'qty' in col]
col_transform = CustomColumnTransformer(
    cols_for_scaling=qty_cols,
    scaling_algo=RobustScaler(),
    cols_for_ohe=None,
    cols_for_winsor=None,
    cols_to_skip=None
)

In [None]:
pipe = Pipeline(
    [
        ('column_transformer', col_transform),
        ('rf', RandomForestClassifier(max_features=None, random_state=571, n_jobs=7))
    ]
)

In [None]:
pipe.fit(X_train, y_train)
train_preds = pipe.predict(X_train)
test_preds = pipe.predict(X_test)

print('Train data')
print(classification_report(y_train, train_preds))
print('Test data')
print(classification_report(y_test, test_preds))

In [None]:
X_train, X_test = col_transform.fit_transform(X_train, y_train), col_transform.fit_transform(X_test, y_test)

In [None]:
rf_op = OptunaTuner(RandomForestClassifier, accuracy_score, direction='maximize', random_state=571, n_jobs=7)
rf_op.fit(
    200, X_train, y_train, X_test, y_test,
    ('n_estimators', 'int', 10, 200),
    ('max_depth', 'int', 20, 40),
    ('max_features', 'float', 0.05, 1.0, {'step': 0.05}),
    ('max_samples', 'float', 0.05, 1.0, {'step': 0.05}),
    ('min_samples_leaf', 'float', 1e-4, 1e-2, {'log': True}),
    ('min_samples_split', 'float', 1e-4, 1e-2, {'log': True})
)

In [None]:
rf_op.model.get_params()

In [None]:
optuna.visualization.plot_optimization_history(rf_op.study)

In [None]:
config = rf_op.model.get_params()
rf = RandomForestClassifier(**config)

# Create w&b run for the training set
with utils.init_wandb_run(
    name='continuous_features_optuna_subperiod_15',
    model=RandomForestClassifier,
    config=config,
    target_month=fe.target_month,
    group='parameters_tuning',
    job_type='tuning_train'
) as run:

    rf.fit(X_train, y_train)
    train_preds = rf.predict(X_train)

    rep = utils.parse_classification_report(
        classification_report(y_train, train_preds, output_dict=True)
    )

    metadata = {
        'experiment': {
            'name': run.name,
        },
        'performance_report': rep,
        'config': config
    }

    artifact = wandb.Artifact(
        name=f'report_train',
        type='performance_metric',
        metadata=metadata
    )
    run.log_artifact(artifact)
    run.finish()

# Create w&b run for the test set
with utils.init_wandb_run(
    name='continuous_features_optuna_subperiod_15',
    model=RandomForestClassifier,
    config=config,
    target_month=fe.target_month,
    group='parameters_tuning',
    job_type='tuning_test'
) as run:
    test_preds = rf.predict(X_test)
    rep = utils.parse_classification_report(
        classification_report(y_test, test_preds, output_dict=True)
    )

    metadata = {
        'experiment': {
            'name': run.name,
        },
        'performance_report': rep,
        'config': config
    }

    artifact = wandb.Artifact(
        name=f'report_test',
        type='performance_metric',
        metadata=metadata
    )
    run.log_artifact(artifact)
    run.finish()