In [None]:
# Cell 1: Imports and Data Loading
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
data = pd.read_csv('augmented_bakery_data.csv')

# Encode categorical variables
le_product = LabelEncoder()
le_storage = LabelEncoder()

data['Product_Type_encoded'] = le_product.fit_transform(data['Product_Type'])
data['Storage_Condition_encoded'] = le_storage.fit_transform(data['Storage_Condition'])

# Print encoding mapping
print("Product Type Encoding:")
for i, label in enumerate(le_product.classes_):
    print(f"{label}: {i}")

print("\nStorage Condition Encoding:")
for i, label in enumerate(le_storage.classes_):
    print(f"{label}: {i}")

In [None]:
# Cell 2: Data Preparation Function
def prepare_data(data, target_type='product', include_other_label=False):
    """
    Prepare data for classification
    
    Parameters:
    - data: DataFrame containing the data
    - target_type: 'product' or 'storage'
    - include_other_label: whether to include the other label as a feature
    
    Returns:
    - X: features
    - y: target variable
    """
    # Get base features (gains and phases)
    feature_cols = [col for col in data.columns if col.startswith(('gain_', 'phase_'))]
    X = data[feature_cols].copy()
    
    # Add additional feature if requested
    if include_other_label:
        if target_type == 'product':
            X['storage_condition'] = data['Storage_Condition_encoded']
        else:
            X['product_type'] = data['Product_Type_encoded']
    
    # Select target
    y = data['Product_Type_encoded'] if target_type == 'product' else data['Storage_Condition_encoded']
    
    return X, y