In [5]:
# Google Colab Notebook: Packaging Strength Recommendation per Layer

# ----------------------------------------
# Cell 1: Install Dependencies
# ----------------------------------------
!pip install --quiet pandas numpy scikit-learn xgboost matplotlib

# ----------------------------------------
# Cell 2: Imports
# ----------------------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score
import xgboost as xgb
import pickle

# ----------------------------------------
# Cell 3: Synthetic Data Generation
# ----------------------------------------
def generate_synthetic_data(n_samples=10000000, n_layers=4, random_state=42):
    np.random.seed(random_state)
    rows = []
    # Map for ECT
    ECT_map = {'corrugated': 3, 'plastic': 2, 'wood': 4}
    for _ in range(n_samples):
        layer = np.random.randint(0, n_layers)
        num_boxes = np.random.randint(5, 20)
        avg_box_weight = np.random.uniform(2, 20)  # kg
        total_weight_above = layer * num_boxes * avg_box_weight
        box_material = np.random.choice(list(ECT_map.keys()))
        box_thickness = np.random.uniform(1, 10)  # mm
        fragile = np.random.choice([0, 1], p=[0.8, 0.2])
        perimeter = num_boxes * box_thickness
        depth = np.random.uniform(10, 100)
        k = 5.87
        C = k * ECT_map[box_material] * np.sqrt(perimeter * depth)
        # Bin C into strength levels
        if C < 500:
            strength = 'Light'
        elif C < 1000:
            strength = 'Medium'
        else:
            strength = 'Heavy'
        rows.append({
            'layer': layer,
            'num_boxes': num_boxes,
            'avg_box_weight': avg_box_weight,
            'total_weight_above': total_weight_above,
            'box_material': box_material,
            'box_thickness': box_thickness,
            'fragile': fragile,
            'strength': strength
        })
    return pd.DataFrame(rows)

# Generate data
df = generate_synthetic_data()
df.head()

# ----------------------------------------
# Cell 4: Preprocessing & Feature Pipeline
# ----------------------------------------
# Features and target
X = df.drop('strength', axis=1)
y = df['strength']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Light=1, Medium=2, Heavy=0 (example)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Preprocessing for features
numeric_features = ['layer', 'num_boxes', 'avg_box_weight', 'total_weight_above', 'box_thickness']
categorical_features = ['box_material', 'fragile']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', cat_transformer, categorical_features)
    ]
)

# ----------------------------------------
# Cell 5: Model Pipeline & Training
# ----------------------------------------
model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', model)
])

# Train model
clf.fit(X_train, y_train)

# ----------------------------------------
# Cell 6: Evaluation
# ----------------------------------------
# Predict
y_pred = clf.predict(X_test)

# Decode labels for readability
y_test_labels = label_encoder.inverse_transform(y_test)
y_pred_labels = label_encoder.inverse_transform(y_pred)

# Metrics
print("Accuracy:", accuracy_score(y_test_labels, y_pred_labels))
print("Classification Report:\n", classification_report(y_test_labels, y_pred_labels))

# ----------------------------------------
# Cell 7: Save Model and Label Encoder
# ----------------------------------------
with open('packaging_strength_model.pkl', 'wb') as f:
    pickle.dump(clf, f)
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
print("Model pipeline and label encoder saved.")

# ----------------------------------------
# Cell 8: Prediction Function
# ----------------------------------------
def recommend_strength_for_layers(layer_data, model_path='packaging_strength_model.pkl', le_path='label_encoder.pkl'):
    """
    layer_data: List of dicts, each with keys:
      layer, num_boxes, avg_box_weight, total_weight_above,
      box_material, box_thickness, fragile
    """
    df_layers = pd.DataFrame(layer_data)
    # Load model and encoder
    with open(model_path, 'rb') as f:
        pipeline = pickle.load(f)
    with open(le_path, 'rb') as f:
        le = pickle.load(f)
    preds_encoded = pipeline.predict(df_layers)
    preds = le.inverse_transform(preds_encoded)
    return [{'layer': ld['layer'], 'suggested_strength': p}
            for ld, p in zip(layer_data, preds)]

# Example usage
example = [
    {'layer': 0, 'num_boxes': 10, 'avg_box_weight': 5.0, 'total_weight_above': 0,
     'box_material': 'corrugated', 'box_thickness': 5, 'fragile': 0},
    {'layer': 1, 'num_boxes': 10, 'avg_box_weight': 5.0, 'total_weight_above': 50,
     'box_material': 'corrugated', 'box_thickness': 5, 'fragile': 0}
]
print(recommend_strength_for_layers(example))

# ----------------------------------------
# Cell 9: Next Steps & Hyperparameter Tuning
# ----------------------------------------
# - Use GridSearchCV or Optuna to tune hyperparameters.
# - Replace synthetic data with real test data or sensor logs.
# - Integrate into FastAPI or Streamlit for vendor interface.


Parameters: { "use_label_encoder" } are not used.



Accuracy: 0.7184085
Classification Report:
               precision    recall  f1-score   support

       Heavy       0.75      0.84      0.80    808380
       Light       0.78      0.48      0.60    338703
      Medium       0.67      0.69      0.68    852917

    accuracy                           0.72   2000000
   macro avg       0.73      0.67      0.69   2000000
weighted avg       0.72      0.72      0.71   2000000

Model pipeline and label encoder saved.
[{'layer': 0, 'suggested_strength': 'Medium'}, {'layer': 1, 'suggested_strength': 'Medium'}]
