In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pandas numpy plotly




In [3]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import plotly.express as px
from scipy.stats import pearsonr
import glob
import plotly.express as px
import xarray as xr
import geopandas as gpd
import numpy as np

import plotly.graph_objects as go
from scipy import stats


In [4]:
heat_stroke = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heat-stroke-deaths.csv")
heatwave_summary = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heatwave_summary.csv")

In [5]:
# Melt the heat stroke deaths dataset to long format
heat_stroke_deaths_long = heat_stroke.melt(id_vars=["state_ut"], var_name="year", value_name="deaths")

# Convert year to integer for merging compatibility
heat_stroke_deaths_long["year"] = heat_stroke_deaths_long["year"].astype(int)

# Standardizing column names for merging
heatwave_summary.rename(columns={"st_nm": "state_ut"}, inplace=True)

# Merge datasets on state and year
merged_df = pd.merge(heat_stroke_deaths_long, heatwave_summary, on=["state_ut", "year"], how="inner")

# Display the first few rows of the merged dataset
merged_df.head()

Unnamed: 0,state_ut,year,deaths,heatwave_days,median_wbt
0,Andhra Pradesh,2001,46,20,22.679109
1,Assam,2001,7,7,19.547613
2,Bihar,2001,47,5,23.027893
3,Chhattisgarh,2001,5,22,22.91903
4,Goa,2001,6,3,19.366161


In [6]:

heat_stroke = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/heat-stroke-deaths.csv")
heatwave = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/merged.csv")

In [7]:
heatwave.head(3)

Unnamed: 0.1,Unnamed: 0,state_ut,year,deaths,heatwave_days,median_wbt
0,0,Andhra Pradesh,2001,46,20,22.679109
1,1,Assam,2001,7,7,19.547613
2,2,Bihar,2001,47,5,23.027893


In [9]:
# Convert wide format (years as columns) to long format
heat_stroke_long = heat_stroke.melt(id_vars=['state_ut'], var_name='year', value_name='heat_deaths')

# Convert 'year' to integer
heat_stroke_long['year'] = heat_stroke_long['year'].astype(int)

# Group by year to get total heat deaths per year
heat_deaths_per_year = heat_stroke_long.groupby('year')['heat_deaths'].sum().reset_index()


In [10]:
# Create a scatter plot with a trend line
fig = px.scatter(heat_deaths_per_year, x='year', y='heat_deaths',
                 title="Heat-Related Deaths in India (2001-2022)",
                 labels={'year': 'Year', 'heat_deaths': 'Total Heat Deaths'},
                 trendline="ols")  # Ordinary Least Squares regression line
                 #template="plotly_dark")

# Add a line connecting the points
fig.add_trace(go.Scatter(x=heat_deaths_per_year['year'],
                         y=heat_deaths_per_year['heat_deaths'],
                         mode='lines+markers',
                         name='Actual Deaths Data'))

# Customize the layout
fig.update_layout(
    xaxis=dict(tickangle=-45, showgrid=True),
    yaxis=dict(showgrid=True),
    hovermode="x unified",
    showlegend=True,
    legend=dict(
        orientation="h",  # Horizontal legend
        yanchor="bottom",  # Align to the bottom of the legend box
        y=1.1,  # Position above the plot area
        xanchor="center",  # Center the legend
        x=0.5  # Set horizontal position to the middle
    )
)


In [11]:
fig2 = px.line(
    heatwave,
    x="year",
    y="deaths",
    color="state_ut",
    markers=True,
    title="State-wise Heat-Related Deaths Over Years",
    labels={"year": "Year", "deaths": "Deaths"},
    #template="plotly_dark"
)
fig2.show()

In [12]:
# Find the Correlation Between Heatwave Days & Deaths
correlation = heatwave["heatwave_days"].corr(heatwave["deaths"])
print(f"Correlation between Heatwave Days and Heat-Related Deaths: {correlation:.3f}")

# Interactive Heatwave Days & Deaths Plot
fig1 = px.scatter(
    heatwave,
    x="heatwave_days",
    y="deaths",
    color="state_ut",
    size="deaths",
    hover_data=["year"],
    title="Heatwave Days vs. Heat-Related Deaths (State-wise)",
    labels={"heatwave_days": "Heatwave Days", "deaths": "Heat-Related Deaths"},
    #template="plotly_dark"
)
fig1.show()




Correlation between Heatwave Days and Heat-Related Deaths: 0.221


In [15]:

# Aggregate total heatwave days and deaths per state
state_heatwave_days = heatwave.groupby("state_ut")["heatwave_days"].sum().sort_values(ascending=False)
state_deaths = heatwave.groupby("state_ut")["deaths"].sum().sort_values(ascending=False)

# Create figure with two y-axes
fig = go.Figure()

# Add Heatwave Days (Left Axis)
fig.add_trace(go.Bar(
    x=state_heatwave_days.index,
    y=state_heatwave_days.values,
    name="Total Heatwave Days",
    marker_color="orange",
    yaxis="y1"
))

# Add Heat-Related Deaths (Right Axis)
fig.add_trace(go.Bar(
    x=state_deaths.index,
    y=state_deaths.values,
    name="Total Heat-Related Deaths",
    marker_color="red",
    yaxis="y2"
))

# Layout settings
fig.update_layout(
    title="Total Heatwave Days & Heat-Related Deaths per State",
    xaxis=dict(title="State"),
    yaxis=dict(
        title="Total Heatwave Days",
        titlefont=dict(color="orange"),
        tickfont=dict(color="orange"),
    ),
    yaxis2=dict(
        title="Total Heat-Related Deaths",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
        overlaying="y",
        side="right"
    ),
    legend=dict(
        x=1.05,  # Moves the legend to the right side
        y=1,  # Aligns the legend to the top
        xanchor="left",
        yanchor="top"
    ),
    #template="plotly_dark",
    barmode="group"  # Group bars side by side
)

fig.show()


In [None]:
# Bivariate Correlation (Heatwave Days vs. Deaths)
correlation, p_value = pearsonr(heatwave["heatwave_days"], heatwave["deaths"])
print(f"Bivariate Correlation between Heatwave Days and Heat-Related Deaths: {correlation:.3f}")
print(f"P-value: {p_value:.5f}")

# Global Analysis of Heatwave Days & Deaths

# Aggregate global stats (sum over all states per year)
global_trend = heatwave.groupby("year")[["heatwave_days", "deaths"]].sum().reset_index()

# Interactive line plot for Global Trends
fig = px.line(
    global_trend,
    x="year",
    y=["heatwave_days", "deaths"],
    markers=True,
    title="Global Heatwave Days & Heat-Related Deaths Over Years",
    labels={"value": "Count", "year": "Year"},
    #template="plotly_dark"
)
fig.show()

Bivariate Correlation between Heatwave Days and Heat-Related Deaths: 0.221
P-value: 0.00000


In [16]:
# State-wise Heatwave Trends

# Interactive line plot for state-wise heatwave trends
fig2 = px.line(
    heatwave,
    x="year",
    y="deaths",
    color="state_ut",
    markers=True,
    title="State-wise Heat-Related Deaths Over Years",
    labels={"year": "Year", "deaths": "Heat-Related Deaths"},
    #template="plotly_dark"
)
fig2.show()

In [17]:
# Filter data for the year 2015
heatwave_2015 = heatwave[heatwave["year"] == 2015]

# Scatter Plot: Heatwave Days vs. Deaths (2015)
fig1 = px.scatter(
    heatwave_2015,
    x="heatwave_days",
    y="deaths",
    color="state_ut",
    size="deaths",
    hover_data=["state_ut"],
    title="Heatwave Days vs. Heat-Related Deaths (2015)",
    labels={"heatwave_days": "Heatwave Days", "deaths": "Heat-Related Deaths"},
    #template="plotly_dark"
)
fig1.show()



In [18]:
pop_density = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/population_density_2001_2022.csv")
pop_density.head(3)

Unnamed: 0,Year,Population Density
0,2001,320.5
1,2002,328.2
2,2003,336.1


In [19]:
# Rename columns in population density dataset for consistency
pop_density.rename(columns={"Year": "year", "Population Density": "population_density"}, inplace=True)

# Merge with heatwave data based on year
merged_df = heatwave.merge(pop_density, on="year", how="left")


In [21]:

# Aggregate total heatwave deaths & days by year
global_data = merged_df.groupby("year")[["deaths", "heatwave_days"]].sum().reset_index()

# Merge with population density
global_data = global_data.merge(pop_density, on="year", how="left")

# Compute correlation
correlation = np.corrcoef(global_data["population_density"], global_data["deaths"])[0, 1]
print(f"Correlation between Population Density and Heat-Related Deaths: {correlation:.3f}")


Correlation between Population Density and Heat-Related Deaths: 0.582


In [23]:

fig = go.Figure()

# Add Heat-Related Deaths (Left Y-Axis)
fig.add_trace(go.Scatter(
    x=global_data["year"],
    y=global_data["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    marker=dict(color="red"),
    yaxis="y1"
))

# Add Population Density (Right Y-Axis)
fig.add_trace(go.Scatter(
    x=global_data["year"],
    y=global_data["population_density"],
    mode="lines+markers",
    name="Population Density",
    marker=dict(color="blue"),
    yaxis="y2"
))

# Layout settings
fig.update_layout(
    title="Population Density vs. Heat-Related Deaths Over Time",
    xaxis=dict(title="Year"),

    # Left Y-Axis (Heat-Related Deaths)
    yaxis=dict(
        title="Heat-Related Deaths",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
    ),

    # Right Y-Axis (Population Density)
    yaxis2=dict(
        title="Population Density",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
        overlaying="y",
        side="right"
    ),

    legend=dict(
        x=1.05,  # Moves the legend to the right
        y=1,
        xanchor="left",
        yanchor="top"
    ),

    #template="plotly_dark"
)

fig.show()


In [None]:

# Create an interactive scatter plot with a trend line
fig = px.scatter(
    global_data,
    x="population_density",
    y="deaths",
    trendline="ols",  # Ordinary Least Squares (OLS) regression line
    opacity=0.6,  # Similar to scatter_kws={'alpha': 0.6}
    title="Correlation Between Population Density & Heat-Related Deaths",
    labels={"population_density": "Population Density (per sq km)", "deaths": "Heat-Related Deaths"}
)

# Customize the trendline's appearance
for trace in fig.data:
    if "trendline" in trace.name:  # Identifying the regression line
        trace.line.width = 3  # Make the line thicker
        trace.line.color = "red"  # Set a contrasting color

# Show the interactive plot
fig.show()



In [None]:

# Aggregate data year-wise (not state-wise)
national_trends = heatwave.groupby("year", as_index=False).sum()

# Merge with national population density
national_trends = national_trends.merge(pop_density, on="year", how="left")

# Create figure
fig = go.Figure()

# Heat-Related Deaths (Left Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    marker=dict(color="red"),
    yaxis="y1"
))

# Heatwave Days (Left Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["heatwave_days"],
    mode="lines+markers",
    name="Heatwave Days",
    marker=dict(color="orange"),
    yaxis="y1"
))

# Population Density (Right Y-Axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["population_density"],
    mode="lines+markers",
    name="Population Density",
    marker=dict(color="blue"),
    yaxis="y2"
))

# Layout settings
fig.update_layout(
    title="Heatwave Days, Deaths & Population Density Over Time",
    xaxis=dict(title="Year"),

    # Left Y-Axis (Heat-Related Deaths & Heatwave Days)
    yaxis=dict(
        title="Deaths & Heatwave Days",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
    ),

    # Right Y-Axis (Population Density)
    yaxis2=dict(
        title="Population Density",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue"),
        overlaying="y",
        side="right"
    ),

    legend=dict(
        x=1.05,  # Moves the legend to the right
        y=1,
        xanchor="left",
        yanchor="top"
    ),

    #template="plotly_dark"
)

fig.show()


In [25]:


# Aggregate deaths at the national level (year-wise)
national_deaths = heatwave.groupby("year", as_index=False)["deaths"].sum()

# Merge with national population density
national_data = national_deaths.merge(pop_density, on="year", how="left")

# Scatter plot for national-level trends
fig = px.scatter(
    national_data,
    x="population_density",
    y="deaths",
    color="year",  # Color by year
    size="deaths",
    hover_name="year",
    title="Heat-Related Deaths vs. Population Density",
    labels={
        "population_density": "Population Density (per sq km)",
        "deaths": "Heat-Related Deaths",
        "year": "Year"
    },
    #template="plotly_dark"
)

fig.show()


In [26]:
'''It groups the heatwave data by year and calculates the total number of deaths and heatwave days
for each year at the national level (aggregated across all states or regions).
'''

# Aggregate deaths and heatwave days at the national level (sum over states per year)
national_stats = heatwave.groupby("year", as_index=False).agg({"deaths": "sum", "heatwave_days": "sum"})

# Merge with national population density
national_data = national_stats.merge(pop_density, on="year", how="left")

# Scatter plot: Deaths vs. Population Density (Size represents heatwave days)
fig = px.scatter(
    national_data,
    x="population_density",
    y="deaths",
    color="year",   # Color by year
    size="heatwave_days",  # Marker size represents heatwave days
    hover_name="year",
    title="Heat-Related Deaths vs. Population Density (Heatwave Days as Size)",
    labels={
        "population_density": "Population Density (per sq km)",
        "deaths": "Heat-Related Deaths",
        "heatwave_days": "Heatwave Days",
        "year": "Year"
    },
    #template="plotly_dark"
)

fig.show()


In [28]:
# Aggregate deaths by state
top_states = heatwave.groupby("state_ut")["deaths"].sum().nlargest(3).index.tolist()

# Filter dataset for only these top states
top_states_df = heatwave[heatwave["state_ut"].isin(top_states)]

In [29]:

fig = go.Figure()

# Add Heat-Related Deaths (Left Y-Axis)
for state in top_states:
    state_data = top_states_df[top_states_df["state_ut"] == state]  # Store filtered data once
    fig.add_trace(go.Scatter(
        x=state_data["year"], y=state_data["deaths"],
        mode="lines+markers", name=f"{state} Deaths",
        yaxis="y1"
    ))

# Add Heatwave Days (Right Y-Axis)
for state in top_states:
    state_data = top_states_df[top_states_df["state_ut"] == state]  # Reuse filtered data
    fig.add_trace(go.Scatter(
        x=state_data["year"], y=state_data["heatwave_days"],
        mode="lines+markers", name=f"{state} Heatwave Days",
        yaxis="y2", line=dict(dash="dot")  # Dashed line for differentiation
    ))

# Update layout for dual y-axes
fig.update_layout(
    title=dict(
        text="Heat-Related Deaths & Heatwave Days Over Years (Top 3 States)",
        x=0.5,  # Centered title
        y=0.93,  # Lowered to avoid legend overlap
        xanchor="center",
        yanchor="top"
    ),
    xaxis=dict(title="Year"),
    yaxis=dict(title="Heat-Related Deaths", side="left", showgrid=False),
    yaxis2=dict(title="Heatwave Days", side="right", overlaying="y", showgrid=False),

    # Adjust legend position
    legend=dict(
        x=1.05,  # Move legend slightly to the right (outside the graph)
        y=1.0,  # Keep legend at the top
        xanchor="left",
        yanchor="top",
        bgcolor="rgba(50,50,50,0.7)",  # Semi-transparent background
        font=dict(size=10),  # More compact font
        itemwidth=30,  # Reduce item width for compactness
        orientation="v"  # Ensure vertical legend for better readability
    ),

    #template="plotly_dark"
)

fig.show()


In [30]:
# Group by 'year' and 'state_ut' and compute the max WBT
max_wbt_per_state = heatwave.groupby(["year", "state_ut"])["median_wbt"].max().reset_index()

# Rename column for clarity
max_wbt_per_state = max_wbt_per_state.rename(columns={"median_wbt": "max_wbt"})

# Display the results
print(max_wbt_per_state.head())


   year        state_ut    max_wbt
0  2001  Andhra Pradesh  22.679109
1  2001           Assam  19.547613
2  2001           Bihar  23.027893
3  2001      Chandigarh  21.861861
4  2001    Chhattisgarh  22.919030


In [None]:

# Merge max_wbt into the heatwave dataset
heatwave = heatwave.merge(max_wbt_per_state, on=["year", "state_ut"], how="left")

# Aggregate Data: Sum of deaths and max of WBT per year
national_trends = heatwave.groupby("year", as_index=False).agg({"deaths": "sum", "max_wbt": "max"})

# Create figure with two y-axes
fig = go.Figure()

# Add Deaths (Left Y-axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["deaths"],
    mode="lines+markers",
    name="Heat-Related Deaths",
    line=dict(color="red", width=2),
    yaxis="y1"
))

# Add Max WBT (Right Y-axis)
fig.add_trace(go.Scatter(
    x=national_trends["year"],
    y=national_trends["max_wbt"],
    mode="lines+markers",
    name="Max WBT (°C)",
    line=dict(color="orange", width=2, dash="dot"),
    yaxis="y2"
))

# Layout Settings
fig.update_layout(
    title="Heatwave Magnitude (Max WBT) vs. Heat-Related Deaths Over Time",
    #template="plotly_dark",
    xaxis=dict(title="Year"),
    yaxis=dict(title="Heat-Related Deaths", titlefont=dict(color="red"), tickfont=dict(color="red")),
    yaxis2=dict(title="Max Wet Bulb Temperature (°C)", titlefont=dict(color="orange"), tickfont=dict(color="orange"),
                overlaying="y", side="right"),
    legend=dict(x=1.1, y=1),  # Move legend to the right
)

# Show plot
fig.show()


In [None]:
# Group by 'year' and 'state_ut' and compute the max WBT
max_wbt_per_state = heatwave.groupby(["year", "state_ut"])["median_wbt"].max().reset_index()

# Rename column for clarity
max_wbt_per_state = max_wbt_per_state.rename(columns={"median_wbt": "max_wbt"})

# Display the results
print(max_wbt_per_state.head())


   year        state_ut    max_wbt
0  2001  Andhra Pradesh  22.679109
1  2001           Assam  19.547613
2  2001           Bihar  23.027893
3  2001      Chandigarh  21.861861
4  2001    Chhattisgarh  22.919030


In [None]:

# 1. Load Your Data (Assuming you've already run the merging and preprocessing steps)
heatwave = pd.read_csv("/content/drive/MyDrive/Impact_Scholars/merged.csv")

# 2. Ensure Data Types
heatwave['year'] = heatwave['year'].astype(int)

# Function to plot state-wise for each year
def plot_state_year(df):
    for year in df['year'].unique():
        year_data = df[df['year'] == year]
        fig = px.scatter(
            year_data,
            x="heatwave_days",
            y="deaths",
            color="state_ut",
            size="deaths",
            hover_data=["state_ut", "year"],
            title=f"Heatwave Days vs. Heat-Related Deaths in {year} (State-wise)",
            labels={"heatwave_days": "Heatwave Days", "deaths": "Heat-Related Deaths"},
        )
        fig.show()


# Generate state-wise plots for each year
plot_state_year(heatwave)


In [31]:
from IPython.core.display import display, HTML

In [None]:


# Ensure Data Types
heatwave['year'] = heatwave['year'].astype(int)

# List of Union Territories to exclude
union_territories = [
    "Andaman & Nicobar Islands", "Chandigarh", "Dadra & Nagar Haveli and Daman & Diu",
    "Lakshadweep", "Delhi", "Puducherry", "Jammu & Kashmir", "Ladakh"
]

# Remove Union Territories
heatwave = heatwave[~heatwave["state_ut"].isin(union_territories)]

# Filter for selected years
selected_years = [2005, 2015, 2019]
filtered_df = heatwave[heatwave['year'].isin(selected_years)]

# Function to plot stacked bar charts
def plot_stacked_bar(df):
    for i, year in enumerate(selected_years):
        year_data = df[df['year'] == year]

        # Create figure
        fig = go.Figure()

        # Stacked Bars: Heatwave Days & Deaths
        fig.add_trace(go.Bar(
            x=year_data["state_ut"], y=year_data["heatwave_days"],
            name="Heatwave Days", marker_color="orange"
        ))

        fig.add_trace(go.Bar(
            x=year_data["state_ut"], y=year_data["deaths"],
            name="Heat-Related Deaths", marker_color="red"
        ))

        # Layout adjustments
        fig.update_layout(
            title=f"Heatwave Days & Heat-Related Deaths in {year} (State-wise, Excluding UTs)",
            xaxis=dict(title="State", tickangle=-45),
            yaxis=dict(title="Count"),
            #template="plotly_dark",
            barmode="stack",  # Stacked bars
            legend=dict(x=1.05, y=1, xanchor="left", yanchor="top", bgcolor="rgba(50,50,50,0.6)")
        )

        fig.show()

        # Add a separator line between plots (except after the last plot)
        if i < len(selected_years) - 1:
            display(HTML("<hr style='border: 1px solid white; margin: 20px 0;'>"))

# Generate stacked bar charts for 2005, 2015, and 2019 (excluding Union Territories)
plot_stacked_bar(filtered_df)
