In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix, average_precision_score

restaurant_df = pd.read_csv(r"C:\Users\ahmed\Desktop\restaurants_3000.csv")
restaurant_df.head()

Unnamed: 0,type_of_food,number_of_guests,event_type,quantity_of_food,storage_conditions,purchase_history,seasonality,preparation_method,geographical_location,pricing,wastage_food_amount
0,Meat,62,Wedding,300,Refrigerated,Regular,All Seasons,Plated,Upper Egypt,High,16
1,Bakery,266,Corporate,80,Refrigerated,Regular,Winter,Plated,Delta,Low,76
2,Vegetables,416,Festival,275,Room Temperature,Occasional,All Seasons,Buffet,Upper Egypt,Moderate,48
3,Fruits,129,Birthday,394,Refrigerated,Regular,Spring,Buffet,Alexandria,Moderate,82
4,Fruits,463,Corporate,520,Frozen,Regular,Spring,Buffet,Delta,Moderate,85


In [2]:
restaurant_df["will_waste"] = (restaurant_df["wastage_food_amount"] / restaurant_df["quantity_of_food"] > 0.25).astype(int)

In [3]:
restaurant_df["per_guest_quantity"] = (
    restaurant_df["quantity_of_food"] / restaurant_df["number_of_guests"].replace(0, 1).astype(int)
).astype(int)

In [4]:
perish_map = {
    "Meat": 2,      
    "Fruits": 2,     
    "Dairy": 2,    
    "Bakery": 1,    
    "Vegetables": 1 
}
restaurant_df["perishability_score"] = restaurant_df["type_of_food"].map(perish_map).fillna(2)

restaurant_df["is_buffet"] = restaurant_df["preparation_method"].str.contains("Buffet", case=False, na=False).astype(int)
restaurant_df["cold_chain_flag"] = restaurant_df["storage_conditions"].str.contains("Frozen|Refrigerated", case=False, na=False).astype(int)

In [5]:
restaurant_df.drop(
    ["event_type", "storage_conditions", "seasonality", "pricing",
     "preparation_method", "purchase_history", "quantity_of_food", "wastage_food_amount","event_type"], 
    axis=1, inplace=True
)

X = restaurant_df.drop(columns=["will_waste"])
y = restaurant_df["will_waste"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_cols = X.select_dtypes(include=["object"]).columns.tolist()


numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_cols),
        ("cat", categorical_transformer, cat_cols)
    ]
)

In [6]:
log_model = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("clf", LogisticRegression(max_iter=1000))
])

log_model.fit(X_train, y_train)
y_pred_log = log_model.predict(X_test)

print("Logistic Regression Report:")
print(classification_report(y_test, y_pred_log))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_log))

Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.79      0.90      0.84       419
           1       0.66      0.43      0.52       181

    accuracy                           0.76       600
   macro avg       0.72      0.67      0.68       600
weighted avg       0.75      0.76      0.74       600

Confusion Matrix:
[[378  41]
 [103  78]]


In [7]:
rf_model = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("clf", RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        random_state=42,
        class_weight="balanced"
    ))
])

rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
y_prob_rf = rf_model.predict_proba(X_test)[:,1]

print("Random Forest Report:")
print(classification_report(y_test, y_pred_rf))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_rf))
print("Average Precision (PR-AUC):", average_precision_score(y_test, y_prob_rf))

Random Forest Report:
              precision    recall  f1-score   support

           0       0.80      0.82      0.81       419
           1       0.55      0.52      0.54       181

    accuracy                           0.73       600
   macro avg       0.68      0.67      0.67       600
weighted avg       0.72      0.73      0.73       600

Confusion Matrix:
[[343  76]
 [ 87  94]]
Average Precision (PR-AUC): 0.5403902072358792


In [8]:

charities_df = pd.read_csv(r"C:\Users\ahmed\Desktop\charities.csv")
waste_restaurants = restaurant_df[restaurant_df["will_waste"] == 1]
matches = []

for idx, rest in waste_restaurants.iterrows():
    candidate_charities = charities_df[charities_df["food_type"] == rest["type_of_food"]].copy()
    if candidate_charities.empty:
        continue

    candidate_charities["score"] = candidate_charities["location"].apply(
        lambda x: 1 if x == rest["geographical_location"] else 0
    )

    best_charity = candidate_charities.sort_values("score", ascending=False).iloc[0]
    distribute_qty = min(rest.get("per_guest_quantity", 0) * 10, best_charity.get("needed_quantity", 0))  # تقدير الكمية المنطقية
    
    matches.append({
        "restaurant_id": rest["geographical_location"],
        "restaurant_food": rest["type_of_food"],
        "charity_name": best_charity["NGO_name"],
        "charity_location": best_charity["location"],
        "quantity_to_send": distribute_qty,
        "contact": best_charity["contact"]
    })

# matches_df = pd.DataFrame(matches)
# print(matches_df.head())

In [9]:
new_restaurant = pd.DataFrame([{
    "number_of_guests": 50,
    "type_of_food": "Meat",
    "per_guest_quantity": 1,   
    "perishability_score": 2,
    "is_buffet": 1,
    "cold_chain_flag": 1,
    "geographical_location": "Cairo",
}])

new_restaurant

Unnamed: 0,number_of_guests,type_of_food,per_guest_quantity,perishability_score,is_buffet,cold_chain_flag,geographical_location
0,50,Meat,1,2,1,1,Cairo


In [10]:
predicted_waste = rf_model.predict(new_restaurant)
predicted_prob = rf_model.predict_proba(new_restaurant)[:,1]

print("Predicted will_waste (0=no, 1=yes):", predicted_waste[0])
print("Probability of wasting:", predicted_prob[0])


Predicted will_waste (0=no, 1=yes): 1
Probability of wasting: 0.76


In [11]:
candidate_charities = charities_df[charities_df["food_type"] == new_restaurant.loc[0, "type_of_food"]].copy()

if not candidate_charities.empty:
    candidate_charities["score"] = candidate_charities["location"].apply(
        lambda x: 1 if x == new_restaurant.loc[0, "geographical_location"] else 0
    )
    best_charity = candidate_charities.sort_values("score", ascending=False).iloc[0]
    distribute_qty = min(new_restaurant.loc[0, "per_guest_quantity"] * new_restaurant.loc[0, "number_of_guests"],
                         best_charity.get("needed_quantity", 0))

    match = {
        "restaurant_food": new_restaurant.loc[0, "type_of_food"],
        "charity_name": best_charity["NGO_name"],
        "charity_location": best_charity["location"],
        "quantity_to_send": distribute_qty,
        "contact": best_charity["contact"]
    }

In [12]:
match

{'restaurant_food': 'Meat',
 'charity_name': 'Charity_722',
 'charity_location': 'Cairo',
 'quantity_to_send': np.int64(50),
 'contact': '+20-1589620796'}

In [13]:
import joblib

joblib.dump(rf_model, "rf_waste_model.pkl")
loaded_model = joblib.load("rf_waste_model.pkl")
