In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pandas numpy plotly




In [3]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import plotly.express as px
from scipy.stats import pearsonr
import glob
import plotly.express as px
import xarray as xr
import geopandas as gpd
import numpy as np
import plotly.graph_objects as go
from scipy import stats
import statsmodels.api as sm


In [19]:
heat_stroke = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heat-stroke-deaths.csv")
heatwave_summary = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heatwave_summary.csv")

In [20]:
# Melt the heat stroke deaths dataset to long format
heat_stroke_deaths_long = heat_stroke.melt(id_vars=["state_ut"], var_name="year", value_name="deaths")

# Convert year to integer for merging compatibility
heat_stroke_deaths_long["year"] = heat_stroke_deaths_long["year"].astype(int)

# Standardizing column names for merging
heatwave_summary.rename(columns={"st_nm": "state_ut"}, inplace=True)

# Merge datasets on state and year
merged_df = pd.merge(heat_stroke_deaths_long, heatwave_summary, on=["state_ut", "year"], how="inner")

# Display the first few rows of the merged dataset
merged_df.head()

Unnamed: 0,state_ut,year,deaths,heatwave_days,median_wbt
0,Andhra Pradesh,2001,46,20,22.679109
1,Assam,2001,7,7,19.547613
2,Bihar,2001,47,5,23.027893
3,Chhattisgarh,2001,5,22,22.91903
4,Goa,2001,6,3,19.366161


In [21]:

heat_stroke = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heat-stroke-deaths.csv")
heatwave = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/merged.csv")

In [23]:
# Load the heat stroke deaths dataset
heat_stroke = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heat-stroke-deaths.csv")

# Convert wide format (years as columns) to long format
heat_stroke_long = heat_stroke.melt(id_vars=['state_ut'], var_name='year', value_name='heat_deaths')

# Convert 'year' to integer
heat_stroke_long['year'] = heat_stroke_long['year'].astype(int)

# Group by year to get total heat deaths per year
heat_deaths_per_year = heat_stroke_long.groupby('year')['heat_deaths'].sum().reset_index()


In [24]:
# Create scatter plot with trendline
fig = px.scatter(heat_deaths_per_year, x='year', y='heat_deaths',
                 title="Heat-Related Deaths in India (2001-2022)",
                 labels={'year': 'Year', 'heat_deaths': 'Total Heat Deaths'},
                 trendline="ols")  # OLS regression line

# Manually compute OLS regression in case px.get_trendline_results fails
X = sm.add_constant(heat_deaths_per_year['year'])  # Add intercept
y = heat_deaths_per_year['heat_deaths']
ols_model = sm.OLS(y, X).fit()  # Fit regression model

# Extract slope & intercept
slope, intercept = ols_model.params['year'], ols_model.params['const']

# Generate equation text
equation_text = f"y = {slope:.2f}x + {intercept:.2f}"

# Add trendline equation as annotation
fig.add_annotation(
    x=heat_deaths_per_year['year'].max(),
    y=heat_deaths_per_year['heat_deaths'].max(),
    text=equation_text,
    showarrow=False,
    font=dict(size=12, color="white"),
    xanchor="right",
    yanchor="top",
    bgcolor="rgba(50, 50, 50, 0.7)"
)

# Add a line connecting the actual data points
fig.add_trace(go.Scatter(x=heat_deaths_per_year['year'],
                         y=heat_deaths_per_year['heat_deaths'],
                         mode='lines+markers',
                         name='Annual Heat Related Deaths'))

# Customize layout
fig.update_layout(
    xaxis=dict(tickangle=-45, showgrid=True),
    yaxis=dict(showgrid=True),
    hovermode="x unified",
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.1,
        xanchor="center",
        x=0.5
    ),
    #template="plotly_dark"
)

fig.show()


In [25]:
# Bivariate Correlation (Heatwave Days vs. Deaths)
correlation, p_value = pearsonr(heatwave["heatwave_days"], heatwave["deaths"])
print(f"Bivariate Correlation between Heatwave Days and Heat-Related Deaths: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

# Global Analysis of Heatwave Days & Deaths

# Aggregate global stats (sum over all states per year)
global_trend = heatwave.groupby("year")[["heatwave_days", "deaths"]].sum().reset_index()

# Interactive line plot for Global Trends
fig = px.line(
    global_trend,
    x="year",
    y=["heatwave_days", "deaths"],
    markers=True,
    title="Global Heatwave Days & Heat-Related Deaths Over Years",
    labels={"value": "Count", "year": "Year"},
    #template="plotly_dark"
)
fig.show()

Bivariate Correlation between Heatwave Days and Heat-Related Deaths: 0.221
P-value: 0.00000


In [26]:
pop_density = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/india_PD_2001_2022.csv")
pop_density.head(3)

Unnamed: 0,year,pop_density
0,2001,363.0
1,2002,369.0
2,2003,376.0


In [27]:
# Rename columns in population density dataset for consistency
pop_density.rename(columns={"year": "year", "pop_density": "population_density"}, inplace=True)

# Merge with heatwave data based on year
merged_df = heatwave.merge(pop_density, on="year", how="left")


In [28]:
merged_df.head(10)

Unnamed: 0.1,Unnamed: 0,state_ut,year,deaths,heatwave_days,median_wbt,population_density
0,0,Andhra Pradesh,2001,46,20,22.679109,363.0
1,1,Assam,2001,7,7,19.547613,363.0
2,2,Bihar,2001,47,5,23.027893,363.0
3,3,Chhattisgarh,2001,5,22,22.91903,363.0
4,4,Goa,2001,6,3,19.366161,363.0
5,5,Gujarat,2001,24,12,23.625233,363.0
6,6,Haryana,2001,1,20,23.656543,363.0
7,7,Himachal Pradesh,2001,2,26,11.529877,363.0
8,8,Jharkhand,2001,2,10,22.315289,363.0
9,9,Karnataka,2001,2,16,20.665346,363.0


In [30]:
# Aggregate total heatwave deaths & days by year
global_data = merged_df.groupby("year")[["deaths", "heatwave_days"]].sum().reset_index()

# Merge with population density
global_data = global_data.merge(pop_density, on="year", how="left")

# Global Analysis of Heatwave Days & Deaths

# Aggregate global stats (sum over all states per year)
global_trend = heatwave.groupby("year")[["heatwave_days", "deaths"]].sum().reset_index()

In [31]:

# Create scatter plot with trendline
fig = px.scatter(
    global_data,
    x="population_density",
    y="deaths",
    trendline="ols",
    opacity=0.6,
    title="Correlation Between Population Density & Heat-Related Deaths",
    labels={"population_density": "Population Density (per sq km)", "deaths": "Heat-Related Deaths"}
)

# Extract OLS trendline results
trend_results = px.get_trendline_results(fig)

# Ensure the result structure is correct before proceeding
if not trend_results.empty:
    ols_model = trend_results.iloc[0]["px_fit_results"]  # Extract model

    # Extract slope and intercept dynamically
    intercept = ols_model.params[0]  # Intercept
    slope = ols_model.params[1]  # Slope for population_density

    # Generate regression line points
    x_range = np.linspace(global_data["population_density"].min(), global_data["population_density"].max(), 100)
    y_pred = intercept + slope * x_range

    # Calculate confidence intervals
    predictions = ols_model.get_prediction(sm.add_constant(x_range))
    conf_int = predictions.conf_int()
    y_lower, y_upper = conf_int[:, 0], conf_int[:, 1]

    # Add confidence interval as a shaded area
    fig.add_traces([
        go.Scatter(x=x_range, y=y_lower, mode='lines', line=dict(color="lightblue", width=0), showlegend=False),
        go.Scatter(x=x_range, y=y_upper, mode='lines', line=dict(color="lightblue", width=0), showlegend=False),
        go.Scatter(
            x=np.concatenate([x_range, x_range[::-1]]),
            y=np.concatenate([y_lower, y_upper[::-1]]),
            fill="toself",
            fillcolor="rgba(173, 216, 230, 0.3)",  # Light blue transparent
            line=dict(color="rgba(255,255,255,0)"),
            name="Confidence Interval"
        )
    ])

# Customize the trendline's appearance (Fix: Check if trace.name is not None)
for trace in fig.data:
    if trace.name is not None and "trendline" in trace.name:  # Ensure trace.name is valid
        trace.line.width = 3  # Thicker line
        trace.line.color = "red"  # Trendline color

# Show the plot
fig.show()




In [32]:
import plotly.graph_objects as go

# Aggregate data year-wise (not state-wise)
national_trends = heatwave.groupby("year", as_index=False).sum()

# Merge with national population density
national_trends = national_trends.merge(pop_density, on="year", how="left")

# Create figure
fig = go.Figure()

# Heat-Related Deaths (Left Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    marker=dict(color="red"),
    yaxis="y1"
))

# Heatwave Days (Left Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["heatwave_days"],
    mode="lines+markers",
    name="Heatwave Days",
    marker=dict(color="orange"),
    yaxis="y1"
))

# Population Density (Right Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["population_density"],
    mode="lines+markers",
    name="Population Density",
    marker=dict(color="blue"),
    yaxis="y2"
))

# Layout settings
fig.update_layout(
    title="Heatwave Days, Deaths & Population Density Over Time",
    xaxis=dict(title="Year"),

    # Left Y-Axis (Heat-Related Deaths & Heatwave Days)
    yaxis=dict(
        title="Deaths & Heatwave Days",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
    ),

    # Right Y-Axis (Population Density)
    yaxis2=dict(
        title="Population Density",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
        overlaying="y",
        side="right"
    ),

    legend=dict(
        x=1.05,  # Moves the legend to the right
        y=1,
        xanchor="left",
        yanchor="top"
    ),

    #template="plotly_dark"
)

fig.show()


In [34]:
# Group by 'year' and 'state_ut' and compute the max WBT
max_wbt_per_state = heatwave.groupby(["year", "state_ut"])["median_wbt"].max().reset_index()

# Rename column for clarity
max_wbt_per_state = max_wbt_per_state.rename(columns={"median_wbt": "max_wbt"})

# Display the results
print(max_wbt_per_state.head())


   year        state_ut    max_wbt
0  2001  Andhra Pradesh  22.679109
1  2001           Assam  19.547613
2  2001           Bihar  23.027893
3  2001      Chandigarh  21.861861
4  2001    Chhattisgarh  22.919030


In [35]:

# Merge max_wbt into the heatwave dataset
heatwave = heatwave.merge(max_wbt_per_state, on=["year", "state_ut"], how="left")

# Aggregate Data: Sum of deaths and max of WBT per year
national_trends = heatwave.groupby("year", as_index=False).agg({"deaths": "sum", "max_wbt": "max"})

# Create figure with two y-axes
fig = go.Figure()

# Add Deaths (Left Y-axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    line=dict(color="red", width=2),
    yaxis="y1"
))

# Add Max WBT (Right Y-axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["max_wbt"],
    mode="lines+markers",
    name="Max WBT (°C)",
    line=dict(color="orange", width=2, dash="dot"),
    yaxis="y2"
))

# Layout Settings
fig.update_layout(
    title="Heatwave Magnitude (Max WBT) vs. Heat-Related Deaths Over Time",
    #template="plotly_dark",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Heat-Related Deaths", titlefont=dict(color="red"), tickfont=dict(color="red")),
    yaxis2=dict(title="Max Wet Bulb Temperature (°C)", titlefont=dict(color="orange"), tickfont=dict(color="orange"),
                overlaying="y", side="right"),
    legend=dict(x=1.1, y=1),  # Move legend to the right
)

# Show plot
fig.show()


In [None]:
# Group by 'year' and 'state_ut' and compute the max WBT
max_wbt_per_state = heatwave.groupby(["year", "state_ut"])["median_wbt"].max().reset_index()

# Rename column for clarity
max_wbt_per_state = max_wbt_per_state.rename(columns={"median_wbt": "max_wbt"})

# Display the results
print(max_wbt_per_state.head())


   year        state_ut    max_wbt
0  2001  Andhra Pradesh  22.679109
1  2001           Assam  19.547613
2  2001           Bihar  23.027893
3  2001      Chandigarh  21.861861
4  2001    Chhattisgarh  22.919030


In [37]:
from IPython.core.display import display, HTML

# Ensure Data Types
heatwave['year'] = heatwave['year'].astype(int)

# List of Union Territories to exclude
union_territories = [
    "Andaman & Nicobar Islands", "Chandigarh", "Dadra & Nagar Haveli and Daman & Diu",
    "Lakshadweep", "Delhi", "Puducherry", "Jammu & Kashmir", "Ladakh"
]

# Remove Union Territories
heatwave = heatwave[~heatwave["state_ut"].isin(union_territories)]

# Filter for selected years
selected_years = [2005, 2015, 2019]
filtered_df = heatwave[heatwave['year'].isin(selected_years)]

# Function to plot stacked bar charts
def plot_stacked_bar(df):
    for i, year in enumerate(selected_years):
        year_data = df[df['year'] == year]

        # Create figure
        fig = go.Figure()

        # Stacked Bars: Heatwave Days & Deaths
        fig.add_trace(go.Bar(
            x=year_data["state_ut"], y=year_data["heatwave_days"],
            name="Heatwave Days", marker_color="orange"
        ))

        fig.add_trace(go.Bar(
            x=year_data["state_ut"], y=year_data["deaths"],
            name="Heat-Related Deaths", marker_color="red"
        ))

        # Layout adjustments
        fig.update_layout(
            title=f"Heatwave Days & Heat-Related Deaths in {year} (State-wise, Excluding UTs)",
            xaxis=dict(title="State", tickangle=-45),
            yaxis=dict(title="Count"),
            #template="plotly_dark",
            barmode="stack",  # Stacked bars
            legend=dict(x=1.05, y=1, xanchor="left", yanchor="top", bgcolor="rgba(50,50,50,0.6)")
        )

        fig.show()

        # Add a separator line between plots (except after the last plot)
        if i < len(selected_years) - 1:
            display(HTML("<hr style='border: 1px solid white; margin: 20px 0;'>"))

# Generate stacked bar charts for 2005, 2015, and 2019 (excluding Union Territories)
plot_stacked_bar(filtered_df)


In [40]:
# Group by 'year' and 'state_ut' and compute the max WBT
max_wbt_per_state = heatwave.groupby(["year", "state_ut"])["median_wbt"].max().reset_index()

# Rename column for clarity
max_wbt_per_state = max_wbt_per_state.rename(columns={"median_wbt": "max_wbt"})

# Compute the maximum WBT for each year
max_wbt_per_year = heatwave.groupby("year")["max_wbt"].max().reset_index()


global_trend = global_trend.merge(max_wbt_per_year, on="year", how="left")

# Display the results
print(max_wbt_per_state.head())

   year        state_ut    max_wbt
0  2001  Andhra Pradesh  22.679109
1  2001           Assam  19.547613
2  2001           Bihar  23.027893
3  2001    Chhattisgarh  22.919030
4  2001             Goa  19.366161


In [41]:
# Compute global trends (aggregated per year)
global_trend = heatwave.groupby("year")[["heatwave_days", "deaths"]].sum().reset_index()

# Merge Max WBT Data (assuming `max_wbt_per_year` contains yearly WBT values)
global_trend = global_trend.merge(max_wbt_per_year, on="year", how="left")

# Create a figure
fig = go.Figure()

# Add Heatwave Days (Left Y-axis)
fig.add_trace(go.Scatter(
    x=global_trend["year"],
    y=global_trend["heatwave_days"],
    mode="lines+markers",
    name="Heatwave Days",
    line=dict(color="blue", width=2, dash="dash"),
    yaxis="y1"
))

# Add Heat-Related Deaths (Left Y-axis)
fig.add_trace(go.Scatter(
    x=global_trend["year"],
    y=global_trend["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    line=dict(color="red", width=2),
    yaxis="y1"
))

# Add Max WBT (Right Y-axis)
fig.add_trace(go.Scatter(
    x=global_trend["year"],
    y=global_trend["max_wbt"],
    mode="lines+markers",
    name="Max WBT (°C)",
    line=dict(color="orange", width=3),
    yaxis="y2"
))

# Update layout for dual-axis visualization
fig.update_layout(
    title="Global Heatwave Trends: Deaths, Heatwave Days & Max WBT",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Heat-Related Deaths & Heatwave Days", titlefont=dict(color="black"), tickfont=dict(color="black")),
    yaxis2=dict(
        title="Max Wet Bulb Temperature (°C)",
        titlefont=dict(color="orange"),
        tickfont=dict(color="orange"),
        overlaying="y",   # Allows dual y-axes
        side="right"
    ),
    legend=dict(x=0.8, y=1.1, orientation="h")
)

fig.show()
