# Week 1 — Day 4: Plotly Express Dashboard (Code Version)

This notebook mirrors the Plotly Studio dashboard using **Plotly Express**.
If `Day4_dataset.csv` is not found, it will generate a small sample dataset with the required schema.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

CSV_PATH = Path('Day4_dataset.csv')

required_cols = [
    "country","grain_type","probiotic_type",
    "avg_daily_gain_kg","feed_conversion_ratio",
    "roi_percent","confidence","source"
]

def load_or_sample(path: Path) -> pd.DataFrame:
    if path.exists():
        df = pd.read_csv(path)
        missing = [c for c in required_cols if c not in df.columns]
        if missing:
            raise ValueError(f"Missing columns in CSV: {missing}")
        return df
    # Fallback: small sample (synthetic)
    rng = np.random.default_rng(42)
    countries = ["Brazil","United States"]
    grains = ["corn","barley","sorghum"]
    probiotics = ["L. plantarum","B. subtilis","RumiBoost"]
    rows = []
    for _ in range(200):
        c = rng.choice(countries)
        g = rng.choice(grains)
        p = rng.choice(probiotics)
        adg = rng.normal(1.25, 0.2)  # avg_daily_gain_kg
        fcr = rng.normal(6.9, 0.5) - (0.15 if p in ["L. plantarum","B. subtilis"] else 0)
        roi = max(5, min(25, rng.normal(15, 4) + (1.5 if p!="RumiBoost" else 0) - 0.2*(fcr-6.8)))
        conf = float(np.clip(rng.normal(0.9, 0.03), 0.75, 0.98))
        rows.append({
            "country": c,
            "grain_type": g,
            "probiotic_type": p,
            "avg_daily_gain_kg": round(float(adg),3),
            "feed_conversion_ratio": round(float(fcr),3),
            "roi_percent": round(float(roi),2),
            "confidence": conf,
            "source": "Sample (Day4 fallback)"
        })
    df = pd.DataFrame(rows)
    df.to_csv(path, index=False)
    print(f"Created sample dataset at {path.resolve()}")
    return df

df = load_or_sample(CSV_PATH)
df.head(5)

In [None]:
# Filters (set to a value or None)
FILTER_COUNTRY = None          # e.g., "Brazil" or "United States"
FILTER_PROBIOTIC = None        # e.g., "L. plantarum"
FILTER_GRAIN = None            # e.g., "corn"

query_df = df.copy()
if FILTER_COUNTRY:
    query_df = query_df[query_df["country"] == FILTER_COUNTRY]
if FILTER_PROBIOTIC:
    query_df = query_df[query_df["probiotic_type"] == FILTER_PROBIOTIC]
if FILTER_GRAIN:
    query_df = query_df[query_df["grain_type"] == FILTER_GRAIN]

print("Filtered rows:", len(query_df))
query_df.head(3)

In [None]:
# KPIs
kpi = {
    "Avg FCR (↓ better)": round(query_df["feed_conversion_ratio"].mean(), 3),
    "Avg ADG (kg)": round(query_df["avg_daily_gain_kg"].mean(), 3),
    "Avg ROI (%)": round(query_df["roi_percent"].mean(), 2),
    "Mean Confidence": round(query_df["confidence"].mean(), 3),
}
kpi

In [None]:
import plotly.express as px
import pandas as pd

kpi_df = pd.DataFrame([kpi]).melt(var_name="Metric", value_name="Value")
fig_kpi = px.bar(kpi_df, x="Metric", y="Value", title="KPI Summary", text="Value")
fig_kpi.update_traces(textposition="outside")
fig_kpi.update_layout(yaxis_title=None, xaxis_title=None)
fig_kpi.show()

In [None]:
# Histogram — FCR by Country (facet)
fig_hist = px.histogram(
    query_df, x="feed_conversion_ratio", facet_col="country",
    nbins=20, title="Distribution of Feed Conversion Ratio (FCR) by Country"
)
fig_hist.update_layout(bargap=0.05)
fig_hist.show()

In [None]:
# Box — ADG by Probiotic, colored by Country
fig_box = px.box(
    query_df, x="probiotic_type", y="avg_daily_gain_kg", color="country",
    points="all", title="Average Daily Gain by Probiotic Type, colored by Country"
)
fig_box.update_layout(xaxis_title="Probiotic Type", yaxis_title="Avg Daily Gain (kg)")
fig_box.show()

In [None]:
# Scatter — FCR vs ROI (size = ADG, color = Probiotic)
hover_cols = ["country","probiotic_type","grain_type","avg_daily_gain_kg","feed_conversion_ratio","roi_percent","confidence","source"]
fig_scatter = px.scatter(
    query_df, x="feed_conversion_ratio", y="roi_percent",
    size="avg_daily_gain_kg", color="probiotic_type", hover_data=hover_cols,
    title="FCR vs ROI (size = ADG, color = Probiotic)"
)
fig_scatter.update_layout(xaxis_title="Feed Conversion Ratio (↓ better)", yaxis_title="ROI (%)")
fig_scatter.show()

In [None]:
# Bar — Mean ROI by Grain, grouped by Country
agg = query_df.groupby(["grain_type","country"], as_index=False)["roi_percent"].mean()
fig_bar = px.bar(
    agg, x="grain_type", y="roi_percent", color="country", barmode="group",
    title="Mean ROI by Grain Type (grouped by Country)", text="roi_percent"
)
fig_bar.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig_bar.update_layout(yaxis_title="Mean ROI (%)", xaxis_title="Grain Type")
fig_bar.show()

In [None]:
# Save all charts to HTML
from pathlib import Path
out_dir = Path("week1/exports")
out_dir.mkdir(parents=True, exist_ok=True)
fig_kpi.write_html(out_dir / "kpi_summary.html", include_plotlyjs="cdn")
fig_hist.write_html(out_dir / "hist_fcr_by_country.html", include_plotlyjs="cdn")
fig_box.write_html(out_dir / "box_adg_by_probiotic_country.html", include_plotlyjs="cdn")
fig_scatter.write_html(out_dir / "scatter_fcr_vs_roi.html", include_plotlyjs="cdn")
fig_bar.write_html(out_dir / "bar_mean_roi_by_grain_country.html", include_plotlyjs="cdn")
print("Saved HTML charts to:", out_dir.resolve())