In [34]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report


In [42]:

# Load the dataset
data = pd.read_csv('food_wastage_dataset_with_output.csv')

# Select only the necessary features
selected_features = ['Day of Week', 'Breakfast Dish', 'Dish1_Name', 'Dish2_Name', 
                     'Dish3_Name', 'Staples1', 'Dessert', 'Expected Diners', 'Meal Type']
data = data[selected_features + ['Output']]  # Keep only the selected features and target column

# Define feature set and target variable
X = data[selected_features]
y = data['Output']

# Identify numerical and categorical features
numerical_features = ['Expected Diners']  # Only 'Expected Diners' is assumed to be numerical
categorical_features = [
    'Day of Week', 'Breakfast Dish', 'Dish1_Name', 'Dish2_Name', 'Dish3_Name', 
    'Staples1', 'Dessert', 'Meal Type'
]

# Define the numeric and categorical transformers
numeric_transform = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MinMaxScaler())
])

categorical_transform = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Create the preprocessor
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transform, numerical_features),
    ('cat', categorical_transform, categorical_features)
])

# Define the model
model = RandomForestClassifier(random_state=42)

# Create the pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', model)
])

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline
pipeline.fit(x_train, y_train)

# Make predictions and evaluate
y_pred = pipeline.predict(x_test)
print(classification_report(y_test, y_pred))

SyntaxError: invalid non-printable character U+00A0 (2433865383.py, line 54)

In [36]:
# Save the model to a pickle file
with open('food_wastage_model.pkl', 'wb') as model_file:
    pickle.dump(pipeline, model_file)

# -----------------------------------------------------------------------------------------------------------------------

# information

In [24]:
data.columns

Index(['Day of Week', 'Breakfast Dish', 'Breakfast Quantity Made (kg)',
       'Breakfast Quantity Consumed (kg)', 'Breakfast Quantity Wasted (kg)',
       'Dish1_Name', 'Dish1_Quantity_Made (kg)',
       'Dish1_Quantity_Consumed (kg)', 'Dish1_Quantity_Wasted (kg)',
       'Dish2_Name', 'Dish2_Quantity_Made (kg)',
       'Dish2_Quantity_Consumed (kg)', 'Dish2_Quantity_Wasted (kg)',
       'Dish3_Name', 'Dish3_Quantity_Made (kg)',
       'Dish3_Quantity_Consumed (kg)', 'Dish3_Quantity_Wasted (kg)',
       'Staples1', 'Staples1_Quantity_Made (kg)',
       'Staples1_Quantity_Consumed (kg)', 'Staples1_Quantity_Wasted (kg)',
       'Dessert', 'Expected Diners', 'Avg Portion (kg)', 'Satisfaction Score',
       'Historical_Trend', 'Seasonal_Preference', 'Preferred Dish',
       'Meal Type', 'Lunch/Dinner Expected Diners',
       'Avg Portion Lunch/Dinner (kg)', 'Quantity Made Lunch/Dinner (kg)',
       'Adjusted Quantity Made Lunch/Dinner (kg)',
       'Quantity Consumed Lunch/Dinner (kg)',
 

In [19]:
data['Breakfast Dish'].unique()

array(['Dosa', 'Baingan Bharta', 'Idli', 'Paneer Butter Masala', 'Rajma',
       'Pav Bhaji', 'Kheer', 'Dal Tadka', 'Aloo Gobi', 'Roti', 'Naan',
       'Chole', 'Paratha', 'Gulab Jamun', 'Jalebi'], dtype=object)

In [21]:
data['Dish1_Name'].unique()

array(['Pav Bhaji', 'Idli', 'Kheer', 'Aloo Gobi', 'Naan', 'Gulab Jamun',
       'Roti', 'Dosa', 'Rajma', 'Dal Tadka', 'Baingan Bharta', 'Jalebi',
       'Paneer Butter Masala', 'Paratha', 'Chole'], dtype=object)

In [22]:
data['Dish2_Name'].unique()

array(['Naan', 'Kheer', 'Gulab Jamun', 'Rajma', 'Pav Bhaji', 'Roti',
       'Paneer Butter Masala', 'Chole', 'Idli', 'Paratha', 'Dosa',
       'Jalebi', 'Dal Tadka', 'Baingan Bharta', 'Aloo Gobi'], dtype=object)

In [23]:
data['Dish3_Name'].unique()

array(['Naan', 'Paneer Butter Masala', 'Paratha', 'Idli', 'Dosa', 'Rajma',
       'Roti', 'Kheer', 'Gulab Jamun', 'Dal Tadka', 'Aloo Gobi', 'Chole',
       'Jalebi', 'Baingan Bharta', 'Pav Bhaji'], dtype=object)

In [25]:
data['Staples1'].unique()

array(['Dal Tadka', 'Chole', 'Baingan Bharta', 'Jalebi', 'Naan', 'Rajma',
       'Pav Bhaji', 'Roti', 'Aloo Gobi', 'Dosa', 'Idli', 'Paratha',
       'Gulab Jamun', 'Kheer', 'Paneer Butter Masala'], dtype=object)

In [27]:
data['Dessert'].unique()

array(['Chole', 'Dosa', 'Roti', 'Naan', 'Aloo Gobi', 'Baingan Bharta',
       'Dal Tadka', 'Rajma', 'Idli', 'Jalebi', 'Pav Bhaji', 'Kheer',
       'Paratha', 'Paneer Butter Masala', 'Gulab Jamun'], dtype=object)

In [28]:
data['Meal Type'].unique()


array(['Lunch', 'Breakfast', 'Dinner'], dtype=object)