In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor

In [2]:
df = pd.read_csv("processed_data.csv")


In [3]:
df = df.drop(['Unnamed: 0', "Zip code", "State", "Completed Date"], axis = 1)

In [4]:
df_795 = df[df["Location ID"] == 795]

In [5]:
df_795 = df_795.dropna()

In [6]:
df_795

Unnamed: 0,Location ID,Tonnage,Latitude,Longitude,Days Since Last Pickup,Last Pickup Tonnage,Second Last Pickup Tonnage,Third Last Pickup Tonnage,Weekdays Since Last Pickup,Days Before Last Pickup,Days Before Second Last Pickup,Days Before Third Last Pickup,Month
43290,795,8.08,33.238248,-81.392282,23.0,6.17,7.62,6.25,17.0,22.0,18.0,2.0,4
43291,795,7.24,33.238248,-81.392282,24.0,8.08,6.17,7.62,16.0,23.0,22.0,18.0,5
43292,795,6.88,33.238248,-81.392282,16.0,7.24,8.08,6.17,12.0,24.0,23.0,22.0,5
43293,795,7.08,33.238248,-81.392282,34.0,6.88,7.24,8.08,24.0,16.0,24.0,23.0,6
43294,795,6.68,33.238248,-81.392282,6.0,7.08,6.88,7.24,4.0,34.0,16.0,24.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43370,795,8.03,33.238248,-81.392282,23.0,9.54,8.83,4.59,17.0,29.0,18.0,7.0,4
43371,795,8.30,33.238248,-81.392282,19.0,8.03,9.54,8.83,13.0,23.0,29.0,18.0,5
43372,795,9.01,33.238248,-81.392282,15.0,8.30,8.03,9.54,11.0,19.0,23.0,29.0,5
43373,795,9.40,33.238248,-81.392282,24.0,9.01,8.30,8.03,17.0,15.0,19.0,23.0,6


In [7]:
X = df_795.drop('Tonnage', axis=1)
y = df_795['Tonnage']

# Decide the split ratio
split_ratio = 0.8  # 80% for training, 20% for testing
split_index = int(len(df_795) * split_ratio)


In [8]:
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [9]:
# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(n_estimators=100)
}

# Train and evaluate models
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    results[name] = mse
    print(f'{name} MSE: {mse}')

# Optionally, you can also print R-squared scores or other metrics
for name, model in models.items():
    print(f'{name} R^2 Score: {model.score(X_test, y_test)}')

Linear Regression MSE: 8.977969921875
Decision Tree MSE: 16.334323529411762
Random Forest MSE: 8.826936179869278
Linear Regression R^2 Score: 0.12599552654577673
Decision Tree R^2 Score: -0.5901447609854473
Random Forest R^2 Score: 0.14069864621583705


BRENDAN'S SINGULAR STORE MODELS

In [10]:
features = ['Tonnage', 'Latitude', 'Longitude',	'Days Since Last Pickup', 'Last Pickup Tonnage', 'Second Last Pickup Tonnage', 'Third Last Pickup Tonnage', 'Weekdays Since Last Pickup', 'Days Before Last Pickup','Days Before Second Last Pickup', 'Days Before Third Last Pickup', 'Month']  
target = 'Days Since Last Pickup'  # replace with your actual target variable

X_supercenter = df_795[features]
y_supercenter = df_795[target]

In [11]:
X_train_super, X_test_super, y_train_super, y_test_super = train_test_split(X_supercenter, y_supercenter, test_size=0.2, random_state=42)

In [12]:

# Random Forest model
rf_model_super = RandomForestRegressor(random_state=42)
rf_model_super.fit(X_train_super, y_train_super)

# Nearest Neighbors model
knn_model_super = KNeighborsRegressor()
knn_model_super.fit(X_train_super, y_train_super)

# Evaluate Random Forest
rf_pred_super = rf_model_super.predict(X_test_super)
rf_mse_super = mean_squared_error(y_test_super, rf_pred_super)

# Evaluate KNN
knn_pred_super = knn_model_super.predict(X_test_super)
knn_mse_super = mean_squared_error(y_test_super, knn_pred_super)


print("RF MSE Supercenter:", rf_mse_super)
print("KNN MSE Supercenter:", knn_mse_super)

RF MSE Supercenter: 0.9605470588235296
KNN MSE Supercenter: 10.174117647058821
