In [12]:
# ============================================
# Imports
# ============================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

sns.set(style="whitegrid", font_scale=1.2)
plt.rcParams['figure.figsize'] = (10, 6)
j

ModuleNotFoundError: No module named 'pandas'

In [None]:
# ============================================
# Load and inspect dataset
# ============================================
df = pd.read_csv("coffee_sales.csv")

# Quick overview
display(df.head())
df.info()
df.describe(include="all")


In [None]:
# ============================================
# Define milk + coffee proportions for each drink
# ============================================
milk_map = {
    "Latte": (0.7, 0.3),
    "Cappuccino": (0.5, 0.5),
    "Flat White": (0.6, 0.4),
    "Hot Chocolate": (1.0, 0.0),
    "Cocoa": (1.0, 0.0),
    "Americano": (0.0, 1.0),
    "Americano with Milk": (0.2, 0.8),
    "Cortado": (0.4, 0.6)
}

def ratio(drink):
    milk, coffee = milk_map.get(drink, (0.0, 1.0))
    return milk / (milk + coffee)

df["milk_ratio"] = df["coffee_name"].apply(ratio)

display(df[["coffee_name", "milk_ratio"]].head(15))


In [None]:
# ============================================
# Distribution of milk_ratio
# ============================================
plt.figure(figsize=(10,6))
sns.histplot(df["milk_ratio"], bins=20, kde=True, color="#3498db")
plt.title("Distribution of Milk-to-Coffee Ratio")
plt.xlabel("Milk Ratio")
plt.ylabel("Count")
plt.show()


In [None]:
# ============================================
# Avg milk ratio per coffee type
# ============================================
ratio_by_coffee = df.groupby("coffee_name")["milk_ratio"].mean().sort_values()

plt.figure(figsize=(10,6))
sns.barplot(
    data=ratio_by_coffee.reset_index(),
    y="coffee_name", x="milk_ratio",
    palette="viridis"
)
plt.title("Average Milk Ratio by Coffee Type")
plt.xlabel("Milk Ratio")
plt.ylabel("Coffee")
plt.show()


In [None]:
# ============================================
# Scatter: money vs. milk_ratio
# ============================================
plt.figure(figsize=(10,6))
sns.scatterplot(data=df, x="milk_ratio", y="money", hue="coffee_name", s=80, alpha=0.7)
plt.title("Milk Ratio vs Sale Amount")
plt.xlabel("Milk Ratio")
plt.ylabel("Sale Amount")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

# Correlation
corr, p = pearsonr(df["milk_ratio"], df["money"])
print(f"Pearson correlation: {corr:.4f}  (p={p:.4f})")


In [None]:
# ============================================
# Milk ratio by time of day
# ============================================
plt.figure(figsize=(10,7))
sns.boxplot(data=df, x="Time_of_Day", y="milk_ratio", palette="coolwarm")
plt.title("Milk Ratio Patterns Across Time of Day")
plt.xlabel("Time of Day")
plt.ylabel("Milk Ratio")
plt.show()

plt.figure(figsize=(10,7))
sns.violinplot(data=df, x="Time_of_Day", y="milk_ratio", inner="quartile", palette="rocket")
plt.title("Milk Ratio Violin Distribution by Time of Day")
plt.xlabel("Time of Day")
plt.ylabel("Milk Ratio")
plt.show()


In [None]:
# ============================================
# Heatmap of avg milk ratio by hour
# ============================================
hour_ratio = df.groupby("hour_of_day")["milk_ratio"].mean()

plt.figure(figsize=(12,6))
sns.lineplot(data=hour_ratio, marker="o", color="#8e44ad")
plt.title("Average Milk Ratio by Hour of Day")
plt.xlabel("Hour of Day")
plt.ylabel("Milk Ratio")
plt.xticks(range(0,24))
plt.show()

# Sales frequency heatmap by hour and ratio bucket
df["ratio_bucket"] = pd.cut(df["milk_ratio"], bins=[0,0.25,0.5,0.75,1.0])

heat = df.groupby(["hour_of_day", "ratio_bucket"]).size().unstack(fill_value=0)

plt.figure(figsize=(12,6))
sns.heatmap(heat, cmap="YlGnBu")
plt.title("Sales Count Heatmap: Milk Ratio Bucket vs Hour")
plt.xlabel("Milk Ratio Bucket")
plt.ylabel("Hour of Day")
plt.show()


In [None]:
# ============================================
# Milk ratio by weekday
# ============================================
plt.figure(figsize=(12,6))
sns.barplot(data=df, x="Weekday", y="milk_ratio", ci=None, palette="magma")
plt.title("Average Milk Ratio Across Weekdays")
plt.xlabel("Weekday")
plt.ylabel("Milk Ratio")
plt.show()

# ============================================
# Milk ratio by month
# ============================================
plt.figure(figsize=(12,6))
sns.barplot(data=df, x="Month_name", y="milk_ratio", ci=None, palette="plasma")
plt.title("Average Milk Ratio Across Months")
plt.xlabel("Month")
plt.ylabel("Milk Ratio")
plt.show()


In [None]:
# ============================================
# Does milkiness influence popularity?
# ============================================
popularity = df.groupby("coffee_name")["money"].count().reset_index()
popularity = popularity.merge(ratio_by_coffee.rename("avg_ratio"), on="coffee_name")

plt.figure(figsize=(10,6))
sns.scatterplot(
    data=popularity,
    x="avg_ratio", y="money", s=200,
    hue="coffee_name", legend=False, palette="rainbow"
)
for _, row in popularity.iterrows():
    plt.text(row["avg_ratio"], row["money"], row["coffee_name"])

plt.title("Coffee Popularity vs Average Milk Ratio")
plt.xlabel("Average Milk Ratio")
plt.ylabel("Number of Sales")
plt.show()


In [None]:
# ============================================
# Time series: daily avg milk ratio
# ============================================
df["Date"] = pd.to_datetime(df["Date"])

daily_ratio = df.groupby("Date")["milk_ratio"].mean()

plt.figure(figsize=(14,6))
daily_ratio.plot(kind="line", marker="o", color="#2980b9")
plt.title("Trend of Milk Ratio Over Time")
plt.xlabel("Date")
plt.ylabel("Average Milk Ratio")
plt.show()
