In [15]:
from google.cloud import storage
import pickle
import pandas as pd
import io
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/harshsingh/Documents/Loblaw-case-study/key.json"

bucket_name = "loblaw-bucket"
pickle_folder_name = "models"
data_folder_name = "processed/feature"
data_file_name = "sales-feature.parquet"

product_model_files = [
    "27in_4K_Gaming_Monitor_sales_model.pkl",
    "Google_Phone_sales_model.pkl",
    "Macbook_Pro_Laptop_sales_model.pkl",
    "ThinkPad_Laptop_sales_model.pkl",
    "iPhone_sales_model.pkl",
]

client = storage.Client()
bucket = client.get_bucket(bucket_name)

def download_pkl(file_name):
    object_path = f"{pickle_folder_name}/{file_name}"
    blob = bucket.blob(object_path)
    
    buffer = io.BytesIO()
    blob.download_to_file(buffer)
    buffer.seek(0)
    model = pickle.load(buffer)
    
    return model


In [17]:
def download_blob(folder_name, file_name):
    object_path = f'{folder_name}/{file_name}'
    blob = bucket.blob(object_path)
    buffer = io.BytesIO()
    blob.download_to_file(buffer)
    buffer.seek(0)
    df = pd.read_parquet(buffer)
    return df

In [18]:

# Function to evaluate model
def evaluate_model(model, test_data, target_col="daily_quantity"):
    feature_cols = ['day_of_week', 'month', 'day', 'lag_1_quantity',
                    'lag_7_quantity', 'lag_30_quantity', 'rolling_7d_mean',
                    'rolling_30d_mean']
    
    X_test = test_data[feature_cols]
    y_test = test_data[target_col]
    
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse ** 0.5
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    return {"RMSE": rmse, "MAE": mae, "R2": r2}


In [None]:

test_data = download_blob(data_folder_name, data_file_name)

model_results = {}
for file_name in product_model_files:
    print(f"Evaluating model: {file_name}")
    model = download_pkl(file_name)
    
    evaluation_metrics = evaluate_model(model, test_data)
    model_results[file_name] = evaluation_metrics

for model_name, metrics in model_results.items():
    print(f"{model_name} -> RMSE: {metrics['RMSE']:.2f}, MAE: {metrics['MAE']:.2f}, R2: {metrics['R2']:.2f}")


Evaluating model: 27in_4K_Gaming_Monitor_sales_model.pkl
Evaluating model: Google_Phone_sales_model.pkl
Evaluating model: Macbook_Pro_Laptop_sales_model.pkl
Evaluating model: ThinkPad_Laptop_sales_model.pkl
Evaluating model: iPhone_sales_model.pkl
27in_4K_Gaming_Monitor_sales_model.pkl -> RMSE: 0.50, MAE: 0.21, R2: 0.08
Google_Phone_sales_model.pkl -> RMSE: 0.51, MAE: 0.22, R2: 0.04
Macbook_Pro_Laptop_sales_model.pkl -> RMSE: 0.49, MAE: 0.23, R2: 0.09
ThinkPad_Laptop_sales_model.pkl -> RMSE: 0.52, MAE: 0.20, R2: -0.01
iPhone_sales_model.pkl -> RMSE: 0.53, MAE: 0.24, R2: -0.03


The R2 of the test model is less, because of the data variance is very less. Need more features to increase the overall performance of the models