In [1]:
import os
import pandas as pd
import numpy as np
import datetime

In [2]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import ttest_ind

In [3]:
from config import input_data_xlsx_filepath, input_data_csv_filepath, date_column, date_prefix_list, trained_models_folderpath, \
date_column, ad_platform_list, target_dict, train_size, feature_list, model_dict, save_val_end_points, n_trials_optuna, all_predictions_csv_filepath, \
all_validation_metrics_filepath, all_ranked_products_filepath, all_actual_products_selected_filepath, all_pred_products_selected_filepath, all_ab_test_results_filepath
from data_load import load_data
from data_preprocessing import preprocess_data
from feature_engineering import engineer_features
from model_training import set_train_test_val_size, train_model
from metrics_prediction import predict_metric
from model_revalidation import compute_metrics_by_category
from product_scoring import rank_products
from AB_testing import ab_test_selection

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175402 entries, 0 to 175401
Data columns (total 27 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   date                           175402 non-null  object 
 1   product_id                     175402 non-null  object 
 2   category                       175402 non-null  object 
 3   price                          175402 non-null  float64
 4   sale_price                     46399 non-null   float64
 5   discount_rate                  175402 non-null  float64
 6   product_age                    175402 non-null  int64  
 7   product_status                 175402 non-null  object 
 8   pct_product_variants_in_stock  175402 non-null  float64
 9   meta_product_revenue           175402 non-null  float64
 10  meta_item_quantity_sold        175402 non-null  int64  
 11  meta_product_detail_views      175402 non-null  int64  
 12  meta_quantity_added_to_cart   

In [4]:
# load_data(input_data_xlsx_filepath, input_data_csv_filepath)
input_df = pd.read_csv(input_data_csv_filepath)
input_df.set_index('product_id', inplace=True)
print('Preprocessing data...')
preprocessed_df = preprocess_data(input_df, date_column)
print('Feature engineering...')
featured_df = engineer_features(preprocessed_df, date_column, date_prefix_list, ad_platform_list)
val_first_day, val_last_day, val_last_day_str = save_val_end_points(featured_df.copy())

dummy_columns = [col for col in featured_df.columns if any(col.startswith(prefix) for prefix in date_prefix_list)]
features = feature_list + dummy_columns

predictions_df_list = []
for ad_platform in ad_platform_list:
    for target in target_dict[ad_platform]:            
        print('Model training...')
        orig_X_val, orig_y_val = set_train_test_val_size(featured_df, val_first_day, val_last_day, train_size, target, features, 
                                                         date_column, date_prefix_list, 
                                                         dummy_columns, encoding=False)        
        X_train, X_test, X_val, y_train, y_test, y_val, encoders, scalers = set_train_test_val_size(featured_df, val_first_day, 
                                                                                                    val_last_day, train_size, 
                                                                                                    target, features, 
                                                                                                    date_column, date_prefix_list, 
                                                                                                    dummy_columns, encoding=True)
        train_model(featured_df, ad_platform, target, features, val_last_day_str, 
                    trained_models_folderpath, X_train, X_test, y_train, y_test, X_val, y_val, encoders, scalers, model_dict, 
                    n_trials_optuna)
        print('Metrics prediction...')
        prediction_df = predict_metric(trained_models_folderpath, val_last_day_str, X_val, y_val, orig_X_val, target, ad_platform, model_dict)
        predictions_df_list.append(prediction_df)

all_predictions_df = pd.concat(predictions_df_list)
all_predictions_df.to_csv(all_predictions_csv_filepath, index=True)

print('Model Validation...')
metrics_df = compute_metrics_by_category(all_predictions_df, all_validation_metrics_filepath)

print('Product Scoring...')
all_ranked_products_df = rank_products(all_predictions_df, metrics_df, all_ranked_products_filepath)

print('AB testing...')
all_actual_products_selected_df, all_pred_products_selected_df, all_ab_test_results_df = ab_test_selection(all_ranked_products_df, all_predictions_df,\
                                                                                                           all_actual_products_selected_filepath, \
                                                                                                           all_pred_products_selected_filepath, \
                                                                                                           all_ab_test_results_filepath)

Preprocessing data...
Feature engineering...
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
Model training...
<class 'pandas.core.frame.DataFrame'>
Index: 23798 entries, 000d864b-c092-4160-8eda-e9efe2f6f639 to fff05380-fae8-44b2-b2ea-f62969ac5706
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   category                       23798 non-null  object        
 1   sale_price                     23798 non-null  float64       
 2   discount_rate                  23798 non-null  float64       
 3   product_age                    23798 non-null  int64         
 4   product_status                 23798 non-null  object        
 5   pct_product_variants_in_stock  23798 non-null  float64       
 6   date                           23798 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(3), int64(1), object(2)
memory usage: 1.5+ MB
None


[I 2025-06-24 11:07:25,080] A new study created in memory with name: no-name-de1c1473-fd88-4cda-a2ea-73ff6bf32a0c
[I 2025-06-24 11:07:25,268] Trial 0 finished with value: 5.73082728421788e-09 and parameters: {}. Best is trial 0 with value: 5.73082728421788e-09.


Optimizing LinearRegression...


[I 2025-06-24 11:07:25,427] A new study created in memory with name: no-name-9a84de09-76e0-4fb5-b2ee-50ce93e4db8b


LinearRegression - Best MSE: 5.73082728421788e-09, R2: 1.0, Best Params: {}
Optimizing DecisionTree...


[I 2025-06-24 11:07:26,105] Trial 0 finished with value: 1.6996160845005282e+20 and parameters: {'max_depth': 10}. Best is trial 0 with value: 1.6996160845005282e+20.
[I 2025-06-24 11:07:26,858] A new study created in memory with name: no-name-47086705-e5bf-4858-af14-e2c55bb7398a


DecisionTree - Best MSE: 1.7001352567248663e+20, R2: 0.8505876071743472, Best Params: {'max_depth': 10}
Optimizing KNeighbors...


[I 2025-06-24 11:07:38,250] Trial 0 finished with value: 1.6299967413243965e+19 and parameters: {'n_neighbors': 6}. Best is trial 0 with value: 1.6299967413243965e+19.


KNeighbors - Best MSE: 1.6299967413243965e+19, R2: 0.9856751565820444, Best Params: {'n_neighbors': 6}

Top 3 Models:
LinearRegression: MSE = 5.73082728421788e-09, R2 = 1.0
KNeighbors: MSE = 1.6299967413243965e+19, R2 = 0.9856751565820444
DecisionTree: MSE = 1.7001352567248663e+20, R2 = 0.8505876071743472

Ensemble Weights:
LinearRegression: 0.3525766416210267
KNeighbors: 0.3475260364369769
DecisionTree: 0.2998973219419965
Metrics prediction...
Model training...
<class 'pandas.core.frame.DataFrame'>
Index: 23798 entries, 000d864b-c092-4160-8eda-e9efe2f6f639 to fff05380-fae8-44b2-b2ea-f62969ac5706
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   category                       23798 non-null  object        
 1   sale_price                     23798 non-null  float64       
 2   discount_rate                  23798 non-null  float64       
 3   product_age             

[I 2025-06-24 11:08:12,440] A new study created in memory with name: no-name-e2e03589-fa37-439e-ac09-60d39bcc6869
[I 2025-06-24 11:08:12,588] Trial 0 finished with value: 8.859090715106326e+21 and parameters: {}. Best is trial 0 with value: 8.859090715106326e+21.


Optimizing LinearRegression...


[I 2025-06-24 11:08:12,759] A new study created in memory with name: no-name-bf3bf6e0-8af3-4eb6-9d9d-e4a8a755e536


LinearRegression - Best MSE: 8.859090715106326e+21, R2: 0.022229087677207215, Best Params: {}
Optimizing DecisionTree...


[I 2025-06-24 11:08:13,505] Trial 0 finished with value: 1.004765922203691e+22 and parameters: {'max_depth': 10}. Best is trial 0 with value: 1.004765922203691e+22.
[I 2025-06-24 11:08:14,212] A new study created in memory with name: no-name-b292961e-31ff-425e-b02d-a522ff882fc6


DecisionTree - Best MSE: 9.647756981944092e+21, R2: -0.06481539127019231, Best Params: {'max_depth': 10}
Optimizing KNeighbors...


[I 2025-06-24 11:08:24,872] Trial 0 finished with value: 1.0796936629881609e+22 and parameters: {'n_neighbors': 3}. Best is trial 0 with value: 1.0796936629881609e+22.


KNeighbors - Best MSE: 1.0796936629881609e+22, R2: -0.1916494500828707, Best Params: {'n_neighbors': 3}

Top 3 Models:
LinearRegression: MSE = 8.859090715106326e+21, R2 = 0.022229087677207215
DecisionTree: MSE = 9.647756981944092e+21, R2 = -0.06481539127019231
KNeighbors: MSE = 1.0796936629881609e+22, R2 = -0.1916494500828707

Ensemble Weights:
LinearRegression: -0.09490048947851343
DecisionTree: 0.2767100677545849
KNeighbors: 0.8181904217239285
Metrics prediction...
Model Validation...
Product Scoring...
AB testing


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  res = hypotest_fun_out(*samples, **kwds)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  res = hypotest_fun_out(*samples, **kwds)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


In [9]:
all_actual_products_selected_df = all_actual_products_selected_df.drop_duplicates()
all_pred_products_selected_df = all_pred_products_selected_df.drop_duplicates()
all_actual_products_selected_df.to_csv(all_actual_products_selected_filepath, index=False)
all_pred_products_selected_df.to_csv(all_pred_products_selected_filepath, index=False)

In [5]:
# metrics_df = compute_metrics_by_category(all_predictions_df, all_validation_metrics_filepath)

In [6]:
# metrics_df

In [7]:
# all_ranked_products_df = rank_products(all_predictions_df, metrics_df, all_ranked_products_filepath)

In [8]:
# all_actual_products_selected_df, all_pred_products_selected_df, all_ab_test_results_df = ab_test_selection(all_ranked_products_df, all_predictions_df,\
#                                                                                                            all_actual_products_selected_filepath, \
#                                                                                                            all_pred_products_selected_filepath, \
#                                                                                                            all_ab_test_results_filepath)