In [1]:
import polars as pl
import polars.selectors as cs

import altair as alt
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df_path = r'/Users/zygimantas/Documents/DataSets/global_disaster_response_2018_2024 (1).csv'

In [3]:
df = pl.read_csv(df_path)

In [4]:
df

date,country,disaster_type,severity_index,casualties,economic_loss_usd,response_time_hours,aid_amount_usd,response_efficiency_score,recovery_days,latitude,longitude
str,str,str,f64,i64,f64,f64,f64,f64,i64,f64,f64
"""2021-01-31""","""Brazil""","""Earthquake""",5.99,111,7.9344e6,15.62,271603.79,83.21,67,-30.613,-122.557
"""2018-12-23""","""Brazil""","""Extreme Heat""",6.53,100,8.3076e6,5.03,265873.81,96.18,55,10.859,-159.194
"""2020-08-10""","""India""","""Hurricane""",1.55,22,765136.99,32.54,49356.49,60.4,22,0.643,-160.978
"""2022-09-15""","""Indonesia""","""Extreme Heat""",4.55,94,1.3083e6,7.83,237512.88,86.41,47,-33.547,30.35
"""2022-09-28""","""United States""","""Wildfire""",3.8,64,2.6559e6,21.9,188910.69,72.81,42,-19.17,-117.137
…,…,…,…,…,…,…,…,…,…,…,…
"""2019-05-14""","""Chile""","""Landslide""",5.5,78,3.7112e6,8.45,305020.35,94.27,55,12.976,-25.68
"""2020-10-30""","""United States""","""Wildfire""",7.76,165,1.2073e7,1.0,363881.25,95.46,76,57.265,-147.346
"""2019-04-27""","""Turkey""","""Flood""",4.9,130,1805859.7,5.14,280665.61,86.67,47,15.217,-27.856
"""2022-10-09""","""Greece""","""Storm Surge""",3.35,82,3.1761e6,19.22,80331.23,84.75,32,-44.002,1.923


In [5]:
df.collect_schema()

Schema([('date', String),
        ('country', String),
        ('disaster_type', String),
        ('severity_index', Float64),
        ('casualties', Int64),
        ('economic_loss_usd', Float64),
        ('response_time_hours', Float64),
        ('aid_amount_usd', Float64),
        ('response_efficiency_score', Float64),
        ('recovery_days', Int64),
        ('latitude', Float64),
        ('longitude', Float64)])

In [6]:
df = df.with_columns(
    pl.col('date').str.strptime(pl.Date, '%Y-%m-%d')
)

In [7]:
df.estimated_size('mb')

4.419755935668945

In [8]:
df = df.with_columns(
    cs.by_dtype(pl.Int64).cast(pl.UInt16),
)

In [9]:
df

date,country,disaster_type,severity_index,casualties,economic_loss_usd,response_time_hours,aid_amount_usd,response_efficiency_score,recovery_days,latitude,longitude
date,str,str,f64,u16,f64,f64,f64,f64,u16,f64,f64
2021-01-31,"""Brazil""","""Earthquake""",5.99,111,7.9344e6,15.62,271603.79,83.21,67,-30.613,-122.557
2018-12-23,"""Brazil""","""Extreme Heat""",6.53,100,8.3076e6,5.03,265873.81,96.18,55,10.859,-159.194
2020-08-10,"""India""","""Hurricane""",1.55,22,765136.99,32.54,49356.49,60.4,22,0.643,-160.978
2022-09-15,"""Indonesia""","""Extreme Heat""",4.55,94,1.3083e6,7.83,237512.88,86.41,47,-33.547,30.35
2022-09-28,"""United States""","""Wildfire""",3.8,64,2.6559e6,21.9,188910.69,72.81,42,-19.17,-117.137
…,…,…,…,…,…,…,…,…,…,…,…
2019-05-14,"""Chile""","""Landslide""",5.5,78,3.7112e6,8.45,305020.35,94.27,55,12.976,-25.68
2020-10-30,"""United States""","""Wildfire""",7.76,165,1.2073e7,1.0,363881.25,95.46,76,57.265,-147.346
2019-04-27,"""Turkey""","""Flood""",4.9,130,1805859.7,5.14,280665.61,86.67,47,15.217,-27.856
2022-10-09,"""Greece""","""Storm Surge""",3.35,82,3.1761e6,19.22,80331.23,84.75,32,-44.002,1.923


In [16]:
df = df.with_columns(
    pl.col('country').cast(pl.Categorical),
    pl.col('disaster_type').cast(pl.Categorical),
)

In [23]:
df.sort('country', 'date').with_columns(
    pl.col('severity_index').rolling_mean(window_size=3).over('country').alias('severity_index_rolling_mean')
).select(
    'country', 'date', 'severity_index', 'severity_index_rolling_mean'
)

country,date,severity_index,severity_index_rolling_mean
cat,date,f64,f64
"""Australia""",2018-01-01,7.54,
"""Australia""",2018-01-01,5.98,
"""Australia""",2018-01-02,5.56,6.36
"""Australia""",2018-01-04,4.84,5.46
"""Australia""",2018-01-04,7.74,6.046667
…,…,…,…
"""United States""",2024-12-30,7.85,6.473333
"""United States""",2024-12-31,10.0,7.626667
"""United States""",2024-12-31,5.22,7.69
"""United States""",2024-12-31,4.49,6.57


In [24]:
df

date,country,disaster_type,severity_index,casualties,economic_loss_usd,response_time_hours,aid_amount_usd,response_efficiency_score,recovery_days,latitude,longitude
date,cat,cat,f64,u16,f64,f64,f64,f64,u16,f64,f64
2021-01-31,"""Brazil""","""Earthquake""",5.99,111,7.9344e6,15.62,271603.79,83.21,67,-30.613,-122.557
2018-12-23,"""Brazil""","""Extreme Heat""",6.53,100,8.3076e6,5.03,265873.81,96.18,55,10.859,-159.194
2020-08-10,"""India""","""Hurricane""",1.55,22,765136.99,32.54,49356.49,60.4,22,0.643,-160.978
2022-09-15,"""Indonesia""","""Extreme Heat""",4.55,94,1.3083e6,7.83,237512.88,86.41,47,-33.547,30.35
2022-09-28,"""United States""","""Wildfire""",3.8,64,2.6559e6,21.9,188910.69,72.81,42,-19.17,-117.137
…,…,…,…,…,…,…,…,…,…,…,…
2019-05-14,"""Chile""","""Landslide""",5.5,78,3.7112e6,8.45,305020.35,94.27,55,12.976,-25.68
2020-10-30,"""United States""","""Wildfire""",7.76,165,1.2073e7,1.0,363881.25,95.46,76,57.265,-147.346
2019-04-27,"""Turkey""","""Flood""",4.9,130,1805859.7,5.14,280665.61,86.67,47,15.217,-27.856
2022-10-09,"""Greece""","""Storm Surge""",3.35,82,3.1761e6,19.22,80331.23,84.75,32,-44.002,1.923


In [36]:
df.sort('date').group_by_dynamic(
    'date',
    every='1q',
    group_by='country'
).agg([
    pl.col('economic_loss_usd').sum(),
    pl.col('casualties').max()
])

country,date,economic_loss_usd,casualties
cat,date,f64,u16
"""Brazil""",2018-01-01,4.1725e8,348
"""Brazil""",2018-04-01,4.9420e8,326
"""Brazil""",2018-07-01,5.3464e8,337
"""Brazil""",2018-10-01,4.6900e8,335
"""Brazil""",2019-01-01,3.4825e8,278
…,…,…,…
"""Turkey""",2023-10-01,3.8651e8,277
"""Turkey""",2024-01-01,4.0101e8,292
"""Turkey""",2024-04-01,4.0530e8,286
"""Turkey""",2024-07-01,4.8107e8,313


In [46]:
df.sort('date').group_by(
    'country'
).agg(
    (pl.col('date') - pl.col('date').shift(1)).alias('recovery_gap_days')
)

country,recovery_gap_days
cat,list[duration[μs]]
"""Chile""","[null, 0µs, … 1d]"
"""Greece""","[null, 2d, … 0µs]"
"""Brazil""","[null, 0µs, … 3d]"
"""United States""","[null, 1d, … 0µs]"
"""Japan""","[null, 1d, … 0µs]"
…,…
"""Indonesia""","[null, 1d, … 0µs]"
"""Mexico""","[null, 0µs, … 1d]"
"""Bangladesh""","[null, 0µs, … 0µs]"
"""Italy""","[null, 2d, … 4d]"


In [47]:
df.with_columns(
    pl.col('date').dt.year().alias('year')
).group_by(
    ['country', 'year']
).agg(
    total_economic_loss_usd=pl.col('economic_loss_usd').sum()
).sort(
    ['country', 'year']
).with_columns(
    pl.col('total_economic_loss_usd')
    .shift(1)
    .over('country')
    .alias('prev_year_total_economic_loss_usd')
).with_columns(
    (
            (pl.col('total_economic_loss_usd') - pl.col('prev_year_total_economic_loss_usd'))
            / pl.col('prev_year_total_economic_loss_usd') * 100
    ).alias('yoy_economic_loss_pct')
)


country,year,total_economic_loss_usd,prev_year_total_economic_loss_usd,yoy_economic_loss_pct
cat,i32,f64,f64,f64
"""Australia""",2018,1.8713e9,,
"""Australia""",2019,1.7511e9,1.8713e9,-6.425394
"""Australia""",2020,1.6904e9,1.7511e9,-3.463816
"""Australia""",2021,1.9524e9,1.6904e9,15.496797
"""Australia""",2022,1.7973e9,1.9524e9,-7.939847
…,…,…,…,…
"""United States""",2020,1.7747e9,1.6963e9,4.617282
"""United States""",2021,2.0292e9,1.7747e9,14.344489
"""United States""",2022,1.6772e9,2.0292e9,-17.346337
"""United States""",2023,1.7415e9,1.6772e9,3.83156


The user wants to calculate the year-over-year percentage growth of `economic_loss_usd` for each country.
To achieve this, I will:
1.  Extract the year from the `date` column.
2.  Group the data by `country` and `year`.
3.  Sum the `economic_loss_usd` for each group.
4.  Sort the result by `country` and `year`.
5.  Use the `shift` function windowed over `country` to retrieve the previous year's economic loss.
6.  Calculate the percentage change.



<llm-snippet-file>kaggle/global_disaster_response_2018_2024.ipynb</llm-snippet-file>


In [48]:
(
    df.with_columns(pl.col("date").dt.year().alias("year"))
    .group_by("country", "year")
    .agg(pl.col("economic_loss_usd").sum().alias("total_economic_loss"))
    .sort("country", "year")
    .with_columns(
        pl.col("total_economic_loss")
        .shift(1)
        .over("country")
        .alias("prev_year_loss")
    )
    .with_columns(
        (
                (pl.col("total_economic_loss") - pl.col("prev_year_loss"))
                / pl.col("prev_year_loss")
                * 100
        ).alias("yoy_growth_pct")
    )
)

country,year,total_economic_loss,prev_year_loss,yoy_growth_pct
cat,i32,f64,f64,f64
"""Australia""",2018,1.8713e9,,
"""Australia""",2019,1.7511e9,1.8713e9,-6.425394
"""Australia""",2020,1.6904e9,1.7511e9,-3.463816
"""Australia""",2021,1.9524e9,1.6904e9,15.496797
"""Australia""",2022,1.7973e9,1.9524e9,-7.939847
…,…,…,…,…
"""United States""",2020,1.7747e9,1.6963e9,4.617282
"""United States""",2021,2.0292e9,1.7747e9,14.344489
"""United States""",2022,1.6772e9,2.0292e9,-17.346337
"""United States""",2023,1.7415e9,1.6772e9,3.83156


In [49]:
df

date,country,disaster_type,severity_index,casualties,economic_loss_usd,response_time_hours,aid_amount_usd,response_efficiency_score,recovery_days,latitude,longitude
date,cat,cat,f64,u16,f64,f64,f64,f64,u16,f64,f64
2021-01-31,"""Brazil""","""Earthquake""",5.99,111,7.9344e6,15.62,271603.79,83.21,67,-30.613,-122.557
2018-12-23,"""Brazil""","""Extreme Heat""",6.53,100,8.3076e6,5.03,265873.81,96.18,55,10.859,-159.194
2020-08-10,"""India""","""Hurricane""",1.55,22,765136.99,32.54,49356.49,60.4,22,0.643,-160.978
2022-09-15,"""Indonesia""","""Extreme Heat""",4.55,94,1.3083e6,7.83,237512.88,86.41,47,-33.547,30.35
2022-09-28,"""United States""","""Wildfire""",3.8,64,2.6559e6,21.9,188910.69,72.81,42,-19.17,-117.137
…,…,…,…,…,…,…,…,…,…,…,…
2019-05-14,"""Chile""","""Landslide""",5.5,78,3.7112e6,8.45,305020.35,94.27,55,12.976,-25.68
2020-10-30,"""United States""","""Wildfire""",7.76,165,1.2073e7,1.0,363881.25,95.46,76,57.265,-147.346
2019-04-27,"""Turkey""","""Flood""",4.9,130,1805859.7,5.14,280665.61,86.67,47,15.217,-27.856
2022-10-09,"""Greece""","""Storm Surge""",3.35,82,3.1761e6,19.22,80331.23,84.75,32,-44.002,1.923
