In [None]:
import pandas as pd

df = pd.read_csv("food_orders_new_delhi.csv")
df = df.dropna()
df.head()


Unnamed: 0,Order ID,Customer ID,Restaurant ID,Order Date and Time,Delivery Date and Time,Order Value,Delivery Fee,Payment Method,Discounts and Offers,Commission Fee,Payment Processing Fee,Refunds/Chargebacks
0,1,C8270,R2924,2024-02-01 01:11:52,2024-02-01 02:39:52,1914,0,Credit Card,5% on App,150,47,0
1,2,C1860,R2054,2024-02-02 22:11:04,2024-02-02 22:46:04,986,40,Digital Wallet,10%,198,23,0
2,3,C6390,R2870,2024-01-31 05:54:35,2024-01-31 06:52:35,937,30,Cash on Delivery,15% New User,195,45,0
4,5,C6734,R2799,2024-01-29 01:19:30,2024-01-29 02:48:30,1992,30,Cash on Delivery,50 off Promo,130,50,0
5,6,C7265,R2777,2024-01-25 04:36:52,2024-01-25 05:27:52,439,20,Cash on Delivery,10%,92,27,150


In [None]:
df.columns

Index(['Order ID', 'Customer ID', 'Restaurant ID', 'Order Date and Time',
       'Delivery Date and Time', 'Order Value', 'Delivery Fee',
       'Payment Method', 'Discounts and Offers', 'Commission Fee',
       'Payment Processing Fee', 'Refunds/Chargebacks'],
      dtype='object')

In [None]:
numeric_cols = [
    "Order Value", "Delivery Fee", "Discounts and Offers",
    "Commission Fee", "Payment Processing Fee", "Refunds/Chargebacks"
]

for col in numeric_cols:
    df[col] = df[col].astype(str).str.replace(r"[^\d.]", "", regex=True)  # remove currency symbols
    df[col] = pd.to_numeric(df[col], errors='coerce')


In [None]:
print("Before dropna:", df.shape)
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")
df = df.dropna(subset=numeric_cols)
print("After dropna:", df.shape)


Before dropna: (815, 12)
After dropna: (815, 12)


In [None]:
df = df.dropna(subset=numeric_cols)
print("Cleaned data shape:", df.shape)


Cleaned data shape: (815, 12)


In [None]:
# Net kar = sipariş değeri + teslimat ücreti - tüm giderler
df["Net Profit"] = (
    df["Order Value"] + df["Delivery Fee"]
    - df["Discounts and Offers"]
    - df["Commission Fee"]
    - df["Payment Processing Fee"]
    - df["Refunds/Chargebacks"]
)

# Sınıf etiketi: Profit / Loss
df["ProfitLabel"] = df["Net Profit"].apply(lambda x: "Profit" if x > 0 else "Loss")


In [None]:
X = df[[
    "Order Value", "Delivery Fee", "Discounts and Offers",
    "Commission Fee", "Payment Processing Fee", "Refunds/Chargebacks"
]]
y = df["ProfitLabel"]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pickle

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        Loss       1.00      1.00      1.00         2
      Profit       1.00      1.00      1.00       161

    accuracy                           1.00       163
   macro avg       1.00      1.00      1.00       163
weighted avg       1.00      1.00      1.00       163



In [None]:
pickle.dump(model, open("profit_model.pkl", "wb"))