In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd

# Load datasets
milkcow_facts = pd.read_csv("dataset/milkcow_facts.csv")
fluid_milk_sales = pd.read_csv("dataset/fluid_milk_sales.csv")
milk_products_facts = pd.read_csv("dataset/milk_products_facts.csv")
clean_cheese = pd.read_csv("dataset/clean_cheese.csv")
state_milk_production = pd.read_csv("dataset/state_milk_production.csv")

# Display basic info
datasets = {
    "milkcow_facts": milkcow_facts,
    "fluid_milk_sales": fluid_milk_sales,
    "milk_products_facts": milk_products_facts,
    "clean_cheese": clean_cheese,
    "state_milk_production": state_milk_production,
}

# Basic Info

In [None]:
for name, df in datasets.items():
    print(f"Dataset: {name}")
    display(df.head())
    print(df.info())
    print("\n" + "="*80 + "\n")

# Check for missing values
for name, df in datasets.items():
    print(f"Missing values in {name}:")
    print(df.isnull().sum())
    print("\n" + "="*80 + "\n")

# Basic Statistical Summary

In [None]:
for name, df in datasets.items():
    print(f"Summary statistics for {name}:")
    display(df.describe())
    print("\n" + "="*80 + "\n")

# Milk Production Trends Over Time

In [None]:
top_milk_years = milkcow_facts.nlargest(5, "milk_production_lbs")[["year", "milk_production_lbs"]]
print("Top 5 years with highest milk production:")
display(top_milk_years)

In [None]:
milkcow_facts["year"] = pd.to_datetime(milkcow_facts["year"], format="%Y")
milkcow_facts.sort_values("year", inplace=True)

plt.figure(figsize=(10, 5))
plt.plot(milkcow_facts["year"], milkcow_facts["milk_production_lbs"], marker="o", linestyle="-", label="Total Milk Production")

plt.xlabel("Year")
plt.ylabel("Milk Production (lbs)")
plt.title("Total Milk Production Over Time")

def format_yaxis(value, _):
    return f"{value/1e9:.1f}B"

plt.gca().yaxis.set_major_formatter(mticker.FuncFormatter(format_yaxis))

plt.legend()
plt.grid()
plt.show()

# Milk Price vs. Dairy Ration Price Trend

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(milkcow_facts["year"], milkcow_facts["avg_price_milk"], marker="o", label="Milk Price ($/lb)")
plt.plot(milkcow_facts["year"], milkcow_facts["dairy_ration"], marker="o", linestyle="dashed", label="Dairy Ration Price ($/lb)")
plt.xlabel("Year")
plt.ylabel("Price ($ per pound)")
plt.title("Milk Price vs. Dairy Ration Price Over Time")
plt.legend()
plt.grid()
plt.show()

# State-wise Milk Production

In [None]:
state_milk_production_grouped = state_milk_production.groupby("state")["milk_produced"].sum().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
state_milk_production_grouped.head(10).plot(kind="bar", color="skyblue")
plt.xlabel("State")
plt.ylabel("Total Milk Produced (lbs)")
plt.title("Top 10 States in Milk Production")
plt.xticks(rotation=45)
plt.show()

# Trends in Dairy Product Consumption (Cheese & Butter)

In [None]:
milk_products_facts["year"] = pd.to_datetime(milk_products_facts["year"], format="%Y")

plt.figure(figsize=(12, 6))
plt.plot(milk_products_facts["year"], milk_products_facts["cheese_american"], marker="o", label="American Cheese")
plt.plot(milk_products_facts["year"], milk_products_facts["cheese_other"], marker="o", linestyle="dashed", label="Other Cheese")
plt.plot(milk_products_facts["year"], milk_products_facts["butter"], marker="o", linestyle="dotted", label="Butter")
plt.xlabel("Year")
plt.ylabel("Consumption per Person (lbs)")
plt.title("Cheese & Butter Consumption Trends")
plt.legend()
plt.grid()
plt.show()

# Cheese Consumption Trends

In [None]:
clean_cheese["Year"] = pd.to_datetime(clean_cheese["Year"], format="%Y")

plt.figure(figsize=(12, 6))
plt.plot(clean_cheese["Year"], clean_cheese["Cheddar"], marker="o", label="Cheddar")
plt.plot(clean_cheese["Year"], clean_cheese["Mozzarella"], marker="o", linestyle="dashed", label="Mozzarella")
plt.plot(clean_cheese["Year"], clean_cheese["Swiss"], marker="o", linestyle="dotted", label="Swiss")
plt.xlabel("Year")
plt.ylabel("Consumption per Person (lbs)")
plt.title("Trends in Cheese Consumption")
plt.legend()
plt.grid()
plt.show()