In [2]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder, LabelEncoder
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset
data = pd.read_csv('all_combinations_with_ranked_products.csv')  # Update with your file path
print("Dataset Loaded")

# Inputs: scale, location, storage_capacity, quality_preference, product_category, sustainability_focus
# Outputs: top_product, second_product, third_product

input_columns = [
    'scale', 'location', 'storage_capacity', 'quality_preference', 
    'product_category', 'sustainability_focus'
]
output_columns = ['top_product', 'second_product', 'third_product']

# Define columns for preprocessing
ordinal_columns = ['scale', 'storage_capacity', 'quality_preference']
categorical_columns = ['location', 'product_category']
numerical_columns = ['sustainability_focus']

# Define pipelines for each feature type
ordinal_pipeline = Pipeline(steps=[('ordinal', OrdinalEncoder())])
categorical_pipeline = Pipeline(steps=[('onehot', OneHotEncoder(drop='first', sparse_output=False))])
numerical_pipeline = Pipeline(steps=[('scaler', StandardScaler())])

# Combine preprocessing steps into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('ord', ordinal_pipeline, ordinal_columns),
        ('cat', categorical_pipeline, categorical_columns),
        ('num', numerical_pipeline, numerical_columns),
    ]
)

# Apply preprocessing pipeline to features
X = data[input_columns]
y = data[output_columns]

# Preprocess the features
X_transformed = preprocessor.fit_transform(X)

# Convert preprocessed features into a DataFrame
preprocessed_columns = (
    ordinal_columns + 
    list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_columns)) + 
    numerical_columns
)
X_processed = pd.DataFrame(X_transformed, columns=preprocessed_columns, index=X.index)

# Label encode output products
output_encoders = {col: LabelEncoder() for col in output_columns}
for col in output_columns:
    y[col] = output_encoders[col].fit_transform(y[col])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Multi-label classification with Random Forest
rf_model = RandomForestClassifier(random_state=42)
multi_output_model = MultiOutputClassifier(rf_model)

# Train the model
multi_output_model.fit(X_train, y_train)
print("Model Trained")

# Predictions
y_pred = multi_output_model.predict(X_test)

# Decode predictions back to original labels
decoded_predictions = pd.DataFrame(y_pred, columns=output_columns)
for col in output_columns:
    decoded_predictions[col] = output_encoders[col].inverse_transform(decoded_predictions[col])

# Evaluation
print("\nClassification Report:")
for i, col in enumerate(output_columns):
    print(f"\nProduct: {col}")
    print(classification_report(y_test[col], y_pred[:, i]))

# Display sample predictions
print("\nSample Predictions:")
print(decoded_predictions.head())


Dataset Loaded


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = output_encoders[col].fit_transform(y[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = output_encoders[col].fit_transform(y[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y[col] = output_encoders[col].fit_transform(y[col])


Model Trained

Classification Report:

Product: top_product
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        35
           2       1.00      1.00      1.00        27
           3       1.00      1.00      1.00        27
           4       1.00      1.00      1.00        39
           5       1.00      1.00      1.00        41
           6       1.00      1.00      1.00        31
           7       1.00      1.00      1.00        31

    accuracy                           1.00       260
   macro avg       1.00      1.00      1.00       260
weighted avg       1.00      1.00      1.00       260


Product: second_product
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        27
           2       1.00      1.00      1.00        41
           3       1.00      1.00      1.00    