In [7]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

# --------------------------------------------------
# 1) Load CSVs produced by Hadoop
# --------------------------------------------------
year_stats = pd.read_csv("CSV Outputs/year_stats.csv")              # year, avg_mean_temp, total_precip
max_df = pd.read_csv("CSV Outputs/max_temp_per_year.csv")          # year, max_temp, max_temp_date
min_df = pd.read_csv("CSV Outputs/min_temp_per_year.csv")          # year, min_temp, min_temp_date

# Make sure 'year' is numeric
year_stats["year"] = year_stats["year"].astype(int)
max_df["year"] = max_df["year"].astype(int)
min_df["year"] = min_df["year"].astype(int)

# --------------------------------------------------
# 2) Merge into a single DataFrame
# --------------------------------------------------
df = year_stats.merge(max_df[["year", "max_temp"]], on="year")
df = df.merge(min_df[["year", "min_temp"]], on="year")

# Rename to match your earlier code style
df = df.rename(columns={
    "max_temp": "max_val",
    "min_temp": "min_val",
    "total_precip": "total_precip"  # already fine
})

# Quick sort just in case
df = df.sort_values("year")
df.columns = [c.strip() for c in df.columns]

In [8]:
# --------------------------------------------------
# 3) Interactive line chart (like your px.line example)
# --------------------------------------------------
fig1 = px.line(
    df,
    x="year",
    y=["avg_mean_temp", "max_val", "min_val"],
    title="Interactive Temperature Trends per Year",
    labels={"value": "Temperature (°C)", "year": "Year"}
)
fig1.show()

In [9]:
# --------------------------------------------------
# 4) 2x2 Interactive Climate Dashboard (like your code)
# --------------------------------------------------
fig2 = sp.make_subplots(
    rows=2,
    cols=2,
    subplot_titles=(
        "Avg Temperature Trend",
        "Max Temp Trend",
        "Min Temp Trend",
        "Precipitation"
    )
)

# Avg temp
fig2.add_trace(
    go.Scatter(x=df["year"], y=df["avg_mean_temp"], name="Avg Temp"),
    row=1, col=1
)

# Max temp
fig2.add_trace(
    go.Scatter(x=df["year"], y=df["max_val"], name="Max Temp"),
    row=1, col=2
)

# Min temp
fig2.add_trace(
    go.Scatter(x=df["year"], y=df["min_val"], name="Min Temp"),
    row=2, col=1
)

# Precipitation
fig2.add_trace(
    go.Scatter(x=df["year"], y=df["total_precip"], name="Total Precip"),
    row=2, col=2
)

fig2.update_layout(
    height=700,
    width=1000,
    title="Interactive Climate Dashboard"
)
fig2.show()

In [10]:
# --------------------------------------------------
# 5) EXTRA: Temperature range band (min–max envelope)
# --------------------------------------------------
# This shows the "spread" between min and max each year
fig3 = go.Figure()

# Min line
fig3.add_trace(go.Scatter(
    x=df["year"],
    y=df["min_val"],
    name="Min Temp",
    mode="lines"
))

# Max line filled to min (shaded band)
fig3.add_trace(go.Scatter(
    x=df["year"],
    y=df["max_val"],
    name="Max Temp",
    mode="lines",
    fill="tonexty",  # fills area between this and previous trace
))

fig3.update_layout(
    title="Temperature Range per Year (Min–Max Band)",
    xaxis_title="Year",
    yaxis_title="Temperature (°C)"
)
fig3.show()

In [11]:
# --------------------------------------------------
# 6) EXTRA: Dual-axis – Avg Temp vs Total Precipitation
# --------------------------------------------------
fig4 = sp.make_subplots(specs=[[{"secondary_y": True}]])

fig4.add_trace(
    go.Scatter(x=df["year"], y=df["avg_mean_temp"],
               name="Avg Temp (°C)"),
    secondary_y=False
)

fig4.add_trace(
    go.Bar(x=df["year"], y=df["total_precip"],
           name="Total Precip (mm)", opacity=0.5),
    secondary_y=True
)

fig4.update_layout(
    title="Avg Temperature vs Total Precipitation per Year"
)

fig4.update_xaxes(title_text="Year")
fig4.update_yaxes(title_text="Avg Temp (°C)", secondary_y=False)
fig4.update_yaxes(title_text="Total Precip (mm)", secondary_y=True)

fig4.show()

In [12]:
# --------------------------------------------------
# 7) EXTRA: Temperature anomaly (relative to long-term mean)
# --------------------------------------------------
baseline = df["avg_mean_temp"].mean()
df["temp_anomaly"] = df["avg_mean_temp"] - baseline

fig5 = go.Figure()
fig5.add_trace(
    go.Bar(x=df["year"], y=df["temp_anomaly"], name="Temp Anomaly")
)
fig5.add_hline(y=0)

fig5.update_layout(
    title=f"Temperature Anomaly per Year (baseline = {baseline:.2f} °C)",
    xaxis_title="Year",
    yaxis_title="Anomaly (°C)"
)
fig5.show()

In [13]:
# --------------------------------------------------
# 8) EXTRA: Scatter – Avg Temp vs Precipitation
# --------------------------------------------------
fig6 = px.scatter(
    df,
    x="avg_mean_temp",
    y="total_precip",
    color="year",
    title="Avg Temperature vs Total Precipitation",
    labels={
        "avg_mean_temp": "Avg Mean Temp (°C)",
        "total_precip": "Total Precip (mm)"
    }
)
fig6.show()