
# ☕ Coffee Shop Sales Prediction
**Date:** 2025-05-28

This notebook explores how weather and time factors affect coffee shop sales, quantity sold, and product category using real transaction and weather data.

Three machine learning models are used:
- Regression to predict `total_sales`
- Regression to predict `transaction_qty`
- Classification to predict `product_category`


In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix


In [None]:

df = pd.read_csv("coffee_weather_ml_ready.csv")
df.head()


In [None]:

features = ['avg_temp_c', 'rain_mm', 'is_rainy', 'hour', 'is_weekend',
            'temperature_bin_Cool', 'temperature_bin_Mild', 'temperature_bin_Warm',
            'time_of_day_Afternoon', 'time_of_day_Evening']
X = df[features]


In [None]:

y = df['total_sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Total Sales Prediction")
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R²:", r2_score(y_test, y_pred))


In [None]:

y = df['transaction_qty']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Transaction Quantity Prediction")
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R²:", r2_score(y_test, y_pred))


In [None]:

y = df['category_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Product Category Classification")
print(classification_report(y_test, y_pred))

conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Product Category')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.show()
