<a href="https://colab.research.google.com/github/berkyyd/Projects/blob/main/food_delivery_uyg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from datetime import date
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler, RobustScaler
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso, ElasticNet, BayesianRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import lightgbm as lgb

In [None]:
import veri_bilimi as vb
import warnings
warnings.filterwarnings("ignore")

In [None]:
def load():
  df = pd.read_csv("Food_Delivery_Times.csv")
  return df
df = load()

In [None]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Order_ID,1000.0,500.5,288.819436,1.0,250.75,500.5,750.25,1000.0
Distance_km,1000.0,10.05997,5.696656,0.59,5.105,10.19,15.0175,19.99
Preparation_Time_min,1000.0,16.982,7.204553,5.0,11.0,17.0,23.0,29.0
Courier_Experience_yrs,970.0,4.579381,2.914394,0.0,2.0,5.0,7.0,9.0
Delivery_Time_min,1000.0,56.732,22.070915,8.0,41.0,55.5,71.0,153.0


In [None]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


In [None]:
vb.grab_col_names(df)

Observations: 1000
Variables: 9
cat_cols: 4
num_cols: 5
cat_but_car: 0
num_but_cat: 0


(['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type'],
 ['Order_ID',
  'Distance_km',
  'Preparation_Time_min',
  'Courier_Experience_yrs',
  'Delivery_Time_min'],
 [])

In [None]:
cat_cols, num_cols, cat_but_car = vb.grab_col_names(df)

Observations: 1000
Variables: 9
cat_cols: 4
num_cols: 5
cat_but_car: 0
num_but_cat: 0


In [None]:
def outlier_thresholds(dataframe, col_name, q1=0.05, q3=0.95):
    quartile1 = dataframe[col_name].quantile(q1)
    quartile3 = dataframe[col_name].quantile(q3)
    interquartile_range = quartile3 - quartile1
    up_limit = quartile3 + 1.5 * interquartile_range
    low_limit = quartile1 - 1.5 * interquartile_range
    return low_limit, up_limit

In [None]:
def check_outlier(dataframe, col_name):
  low_limit, up_limit = outlier_thresholds(dataframe, col_name)
  if dataframe[(dataframe[col_name] > up_limit) | (dataframe[col_name] < low_limit)].any(axis=None):
    return True
  else:
    return False

In [None]:
for col in num_cols:
  print(col, check_outlier(df, col))

Order_ID False
Distance_km False
Preparation_Time_min False
Courier_Experience_yrs False
Delivery_Time_min False


In [None]:
def missing_values_table(dataframe, na_name=False):
  na_columns = [col for col in dataframe.columns if dataframe[col].isnull().sum()>0]
  n_miss = dataframe[na_columns].isnull().sum().sort_values(ascending=False)
  ratio = (dataframe[na_columns].isnull().sum() / dataframe.shape[0] * 100).sort_values(ascending=False)
  missing_df = pd.concat([n_miss, np.round(ratio, 2)], axis=1, keys=['n_miss', 'ratio'])
  print(missing_df, end="\n")
  if na_name:
    return na_columns

In [None]:
missing_values_table(df)

                        n_miss  ratio
Weather                     30    3.0
Traffic_Level               30    3.0
Time_of_Day                 30    3.0
Courier_Experience_yrs      30    3.0


In [None]:
for col in cat_cols:
  df[col].fillna(df[col].mode()[0], inplace=True)

for col in num_cols:
  df[col].fillna(df[col].mean(), inplace=True)

In [None]:
missing_values_table(df)

Empty DataFrame
Columns: [n_miss, ratio]
Index: []


In [None]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


In [None]:
df.tail()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
995,107,8.5,Clear,High,Evening,Car,13,3.0,54
996,271,16.28,Rainy,Low,Morning,Scooter,8,9.0,71
997,861,15.62,Snowy,High,Evening,Scooter,26,2.0,81
998,436,14.17,Clear,Low,Afternoon,Bike,8,0.0,55
999,103,6.63,Foggy,Low,Night,Scooter,24,3.0,58


In [None]:
df["Pre_Distance"] = df["Distance_km"] * df["Preparation_Time_min"]

In [None]:
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min,Pre_Distance
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43,95.16
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84,328.4
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59,266.56
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37,37.2
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68,304.48


In [None]:
def one_hot_encoder(dataframe, categorical_cols, drop_first=True, dtype="int"):
  dataframe = pd.get_dummies(dataframe, columns=categorical_cols, drop_first=drop_first, dtype=dtype)
  return dataframe

In [None]:
df = one_hot_encoder(df, cat_cols)
df.head()

Unnamed: 0,Order_ID,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min,Pre_Distance,Weather_Foggy,Weather_Rainy,Weather_Snowy,Weather_Windy,Traffic_Level_Low,Traffic_Level_Medium,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Car,Vehicle_Type_Scooter
0,522,7.93,12,1.0,43,95.16,0,0,0,1,1,0,0,0,0,0,1
1,738,16.42,20,2.0,84,328.4,0,0,0,0,0,1,1,0,0,0,0
2,741,9.52,28,1.0,59,266.56,1,0,0,0,1,0,0,0,1,0,1
3,661,7.44,5,1.0,37,37.2,0,1,0,0,0,1,0,0,0,0,1
4,412,19.03,16,5.0,68,304.48,0,0,0,0,1,0,0,1,0,0,0


In [None]:
cat_cols, num_cols, cat_but_car = vb.grab_col_names(df)
cat_cols, num_cols, cat_but_car

Observations: 1000
Variables: 17
cat_cols: 11
num_cols: 6
cat_but_car: 0
num_but_cat: 11


(['Weather_Foggy',
  'Weather_Rainy',
  'Weather_Snowy',
  'Weather_Windy',
  'Traffic_Level_Low',
  'Traffic_Level_Medium',
  'Time_of_Day_Evening',
  'Time_of_Day_Morning',
  'Time_of_Day_Night',
  'Vehicle_Type_Car',
  'Vehicle_Type_Scooter'],
 ['Order_ID',
  'Distance_km',
  'Preparation_Time_min',
  'Courier_Experience_yrs',
  'Delivery_Time_min',
  'Pre_Distance'],
 [])

In [None]:
scale = StandardScaler()
df[num_cols] = scale.fit_transform(df[num_cols])
df.head()

Unnamed: 0,Order_ID,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min,Pre_Distance,Weather_Foggy,Weather_Rainy,Weather_Snowy,Weather_Windy,Traffic_Level_Low,Traffic_Level_Medium,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Car,Vehicle_Type_Scooter
0,0.074478,-0.374085,-0.691853,-1.247665,-0.622488,-0.585512,0,0,0,1,1,0,0,0,0,0,1
1,0.822725,1.117008,0.419111,-0.899095,1.23609,1.227908,0,0,0,0,0,1,1,0,0,0,0
2,0.833117,-0.094835,1.530076,-1.247665,0.102811,0.747108,1,0,0,0,1,0,0,0,1,0,1
3,0.555989,-0.460144,-1.663947,-1.247665,-0.894475,-1.036146,0,1,0,0,0,1,0,0,0,0,1
4,-0.306573,1.575401,-0.136371,0.146615,0.510792,1.041932,0,0,0,0,1,0,0,1,0,0,0


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
X = df.drop(["Order_ID", "Delivery_Time_min"], axis=1)
y = df["Delivery_Time_min"]

In [None]:
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
import lightgbm as lgb

def evaluate_regressors(X, y):
    # Veri setini eğitim ve test olarak ayır
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Kullanılacak modeller
    regressors = {
        'Linear Regression': LinearRegression(),
        'Ridge Regression': Ridge(),
        'Lasso Regression': Lasso(),
        'ElasticNet Regression': ElasticNet(),
        'Decision Tree': DecisionTreeRegressor(),
        'Random Forest': RandomForestRegressor(),
        'Gradient Boosting': GradientBoostingRegressor(),
        'SVR': SVR(kernel='rbf'),
        'XGBoost': XGBRegressor()
        #'LightGBM': lgb.LGBMRegressor()
    }

    # Sonuçları saklamak için liste
    results = []

    for name, model in regressors.items():
        print(f"Model: {name}")
        try:
            # Modeli eğit
            model.fit(X_train, y_train)

            # Tahmin yap
            y_pred = model.predict(X_test)

            # Regresyon metriklerini hesapla
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            # Sonuçları sakla
            results.append({
                'Model': name,
                'MSE': mse,
                'MAE': mae,
                'R2 Score': r2
            })

        except Exception as e:
            print(f"Model {name} çalıştırılamadı. Hata: {e}")
            continue

    # Sonuçları DataFrame olarak döndür
    results_df = pd.DataFrame(results)
    return results_df



# Fonksiyonu çağır ve sonuçları yazdır
results = evaluate_regressors(X, y)
print(results)


Model: Linear Regression
Model: Ridge Regression
Model: Lasso Regression
Model: ElasticNet Regression
Model: Decision Tree
Model: Random Forest
Model: Gradient Boosting
Model: SVR
Model: XGBoost
                   Model       MSE       MAE  R2 Score
0      Linear Regression  0.160202  0.268126  0.826069
1       Ridge Regression  0.160885  0.268941  0.825328
2       Lasso Regression  0.926387  0.793361 -0.005774
3  ElasticNet Regression  0.610571  0.628630  0.337105
4          Decision Tree  0.377827  0.453538  0.589795
5          Random Forest  0.228377  0.325922  0.752052
6      Gradient Boosting  0.197193  0.305754  0.785909
7                    SVR  0.183356  0.279002  0.800932
8                XGBoost  0.246790  0.345997  0.732062


In [None]:
regressors = {
        'Linear Regression': LinearRegression(),
        'Ridge Regression': Ridge(),
        'Lasso Regression': Lasso(),
        'ElasticNet Regression': ElasticNet(),
        'Decision Tree': DecisionTreeRegressor(),
        'Random Forest': RandomForestRegressor(),
        'Gradient Boosting': GradientBoostingRegressor(),
        'SVR': SVR(kernel='rbf'),
        'XGBoost': XGBRegressor()
        #'LightGBM': lgb.LGBMRegressor()
    }

In [None]:
def plot_importance(model, features, num=None, save=False):
  # Modelin feature_importances_ özelliğine sahip olup olmadığını kontrol et
  if hasattr(model, 'feature_importances_'):
    feature_img = pd.DataFrame({'Value': model.feature_importances_, 'Feature': features.columns})
    plt.figure(figsize=(10, 10))
    sns.set(font_scale=1)
    sns.barplot(x="Value", y="Feature", data=feature_img.sort_values(by="Value", ascending=False)[0:num])

    plt.title('Features')
    plt.tight_layout()
    plt.show()
    if save:
      plt.savefig('importances.png')  # plt.saveFig yerine plt.savefig kullanılmalı
  else:
    print(f"{model.__class__.__name__} modeli özellik önemi içermiyor.")

In [None]:
for name, model in regressors.items():
  plot_importance(model, X)

LinearRegression modeli özellik önemi içermiyor.
Ridge modeli özellik önemi içermiyor.
Lasso modeli özellik önemi içermiyor.
ElasticNet modeli özellik önemi içermiyor.
DecisionTreeRegressor modeli özellik önemi içermiyor.
RandomForestRegressor modeli özellik önemi içermiyor.
GradientBoostingRegressor modeli özellik önemi içermiyor.
SVR modeli özellik önemi içermiyor.
XGBRegressor modeli özellik önemi içermiyor.
