In [1]:
import os
import polars as pl

# Import Dataset

In [2]:
local_path='dataset/'
file_country_codes='country_codes.csv'
file_gdp_data='gdp_data.csv'
auto='auto'

In [3]:
# get current directory
path = os.getcwd()
print("Current Directory", path)
print()
 
# parent directory
parent = os.path.dirname(path)
print("Parent directory", parent)
print()

# country code dataset path
dataset_path_country_code=os.path.join(parent,local_path,file_country_codes)
print("Dataset Country Code path",dataset_path_country_code)
print()

# gdp dataset path
dataset_path_gdp_data=os.path.join(parent,local_path,file_gdp_data)
print("Dataset GDP path",dataset_path_gdp_data)
print()

# auto path
path_auto=os.path.join(parent,auto)
print("Dump path",path_auto)

Current Directory /Users/baptistebeaurain/Google Drive/learning/Python/polars_explo/project/gdp/notebook

Parent directory /Users/baptistebeaurain/Google Drive/learning/Python/polars_explo/project/gdp

Dataset Country Code path /Users/baptistebeaurain/Google Drive/learning/Python/polars_explo/project/gdp/dataset/country_codes.csv

Dataset GDP path /Users/baptistebeaurain/Google Drive/learning/Python/polars_explo/project/gdp/dataset/gdp_data.csv

Dump path /Users/baptistebeaurain/Google Drive/learning/Python/polars_explo/project/gdp/auto


In [4]:
df_country_code=pl.read_csv(dataset_path_country_code)

print("shape:", df_country_code.shape)

df_country_code.head(5)

shape: (250, 2)


Code,Name
str,str
"""TWN""","""Taiwan"""
"""AFG""","""Afghanistan"""
"""ALB""","""Albania"""
"""DZA""","""Algeria"""
"""ASM""","""American Samoa…"


In [5]:
df_gdp_data=pl.read_csv(dataset_path_gdp_data)
df_gdp_data=df_gdp_data.sort(['Country Code','Year'])

col_rename={
    'Country Name':'cty_name',
    'Country Code':'cty_code',
    'Year': 'year',
    'Value': 'gdp'
    }

df_gdp_data = df_gdp_data.rename(col_rename)

print("shape:", df_gdp_data.shape)

df_gdp_data.head(5)

shape: (11507, 4)


cty_name,cty_code,year,gdp
str,str,i64,f64
"""Aruba""","""ABW""",1994,1330200000.0
"""Aruba""","""ABW""",1995,1320700000.0
"""Aruba""","""ABW""",1996,1379900000.0
"""Aruba""","""ABW""",1997,1531800000.0
"""Aruba""","""ABW""",1998,1665400000.0


# Challenges

 ## Calculate GDP evolution per year

In [6]:
gdp_df = df_gdp_data.sort(['cty_code', 'year']).select([
    pl.col('*'),
    pl.col('gdp').shift(1).over('cty_code').alias('prev_gdp'),
    pl.col('year').shift(1).over('cty_code').alias('prev_year'),
])

gdp_evol_df=gdp_df.with_columns(
    (((pl.col('gdp')/pl.col('prev_gdp')))).alias('gdp_evol'),
    (pl.col('year')-pl.col('prev_year')).alias('nb_year'),
)
gdp_evol_df=gdp_evol_df.with_columns(
    ((pl.col('gdp_evol').pow(1/pl.col('nb_year'))-1)*100).alias('gdp_evol_percent'),
)

gdp_evol_df = gdp_evol_df.filter((pl.col('prev_year').is_not_null()))

# Display or save the result
print(gdp_evol_df)
# OR
gdp_evol_df.write_csv(os.path.join(path_auto,'gdp_evol_df.csv'))


shape: (11_251, 9)
┌──────────┬──────────┬──────┬───────────┬───┬───────────┬──────────┬─────────┬──────────────────┐
│ cty_name ┆ cty_code ┆ year ┆ gdp       ┆ … ┆ prev_year ┆ gdp_evol ┆ nb_year ┆ gdp_evol_percent │
│ ---      ┆ ---      ┆ ---  ┆ ---       ┆   ┆ ---       ┆ ---      ┆ ---     ┆ ---              │
│ str      ┆ str      ┆ i64  ┆ f64       ┆   ┆ i64       ┆ f64      ┆ i64     ┆ f64              │
╞══════════╪══════════╪══════╪═══════════╪═══╪═══════════╪══════════╪═════════╪══════════════════╡
│ Aruba    ┆ ABW      ┆ 1995 ┆ 1.3207e9  ┆ … ┆ 1994      ┆ 0.99286  ┆ 1       ┆ -0.713986        │
│ Aruba    ┆ ABW      ┆ 1996 ┆ 1.3799e9  ┆ … ┆ 1995      ┆ 1.044839 ┆ 1       ┆ 4.483926         │
│ Aruba    ┆ ABW      ┆ 1997 ┆ 1.5318e9  ┆ … ┆ 1996      ┆ 1.110121 ┆ 1       ┆ 11.012146        │
│ Aruba    ┆ ABW      ┆ 1998 ┆ 1.6654e9  ┆ … ┆ 1997      ┆ 1.087163 ┆ 1       ┆ 8.716265         │
│ …        ┆ …        ┆ …    ┆ …         ┆ … ┆ …         ┆ …        ┆ …       ┆ …         

In [7]:
# Display statistics for numeric columns
stats = gdp_evol_df.describe()
print(stats)

shape: (9, 10)
┌────────────┬────────────┬──────────┬───────────┬───┬───────────┬──────────┬──────────┬───────────┐
│ describe   ┆ cty_name   ┆ cty_code ┆ year      ┆ … ┆ prev_year ┆ gdp_evol ┆ nb_year  ┆ gdp_evol_ │
│ ---        ┆ ---        ┆ ---      ┆ ---       ┆   ┆ ---       ┆ ---      ┆ ---      ┆ percent   │
│ str        ┆ str        ┆ str      ┆ f64       ┆   ┆ f64       ┆ f64      ┆ f64      ┆ ---       │
│            ┆            ┆          ┆           ┆   ┆           ┆          ┆          ┆ f64       │
╞════════════╪════════════╪══════════╪═══════════╪═══╪═══════════╪══════════╪══════════╪═══════════╡
│ count      ┆ 11251      ┆ 11251    ┆ 11251.0   ┆ … ┆ 11251.0   ┆ 11251.0  ┆ 11251.0  ┆ 11251.0   │
│ null_count ┆ 0          ┆ 0        ┆ 0.0       ┆ … ┆ 0.0       ┆ 0.0      ┆ 0.0      ┆ 0.0       │
│ mean       ┆ null       ┆ null     ┆ 1991.7189 ┆ … ┆ 1990.7109 ┆ 1.085501 ┆ 1.007999 ┆ 8.462492  │
│            ┆            ┆          ┆ 58        ┆   ┆ 59        ┆          

# Descriptive Stats

## Top 10 GDP Increase

In [8]:
top_5_values = gdp_evol_df.sort('gdp_evol_percent', descending=True)

top_5_values.head(10)

cty_name,cty_code,year,gdp,prev_gdp,prev_year,gdp_evol,nb_year,gdp_evol_percent
str,str,i64,f64,f64,i64,f64,i64,f64
"""Equatorial Gui…","""GNQ""",1965,64748000.0,12712000.0,1964,5.093292,1,409.329235
"""Congo, Dem. Re…","""COD""",2000,19088000000.0,4711300000.0,1999,4.05158,1,305.158039
"""Oman""","""OMN""",1974,1645900000.0,483030000.0,1973,3.407458,1,240.745788
"""Saudi Arabia""","""SAU""",1974,45413000000.0,14947000000.0,1973,3.038186,1,203.818622
"""Qatar""","""QAT""",1974,2401400000.0,793880000.0,1973,3.024878,1,202.487783
"""Iraq""","""IRQ""",1990,179890000000.0,65641000000.0,1989,2.740434,1,174.043385
"""Kiribati""","""KIR""",1974,85637000.0,31711000.0,1973,2.70058,1,170.05802
"""Brunei Darussa…","""BRN""",1974,1073600000.0,433090000.0,1973,2.478866,1,147.88661
"""Kuwait""","""KWT""",1974,13005000000.0,5408300000.0,1973,2.404598,1,140.45983
"""Iraq""","""IRQ""",1974,11517000000.0,5134400000.0,1973,2.243073,1,124.307317


# Top 10 GDP decrease

In [9]:
bottom_5_values = gdp_evol_df.sort('gdp_evol_percent', descending=False)

bottom_5_values.head(10)

cty_name,cty_code,year,gdp,prev_gdp,prev_year,gdp_evol,nb_year,gdp_evol_percent
str,str,i64,f64,f64,i64,f64,i64,f64
"""Vietnam""","""VNM""",1989,6293300000.0,25424000000.0,1988,0.247536,1,-75.246415
"""Serbia""","""SRB""",2000,6540200000.0,18409000000.0,1999,0.355267,1,-64.473259
"""Argentina""","""ARG""",2002,97724000000.0,268700000000.0,2001,0.363696,1,-63.630374
"""Rwanda""","""RWA""",1994,753640000.0,1971500000.0,1993,0.38226,1,-61.773957
"""Congo, Dem. Re…","""COD""",2001,7438200000.0,19088000000.0,2000,0.389678,1,-61.032214
"""Indonesia""","""IDN""",1998,101620000000.0,229710000000.0,1997,0.442392,1,-55.760839
"""Mozambique""","""MOZ""",1987,2354100000.0,5247200000.0,1986,0.448643,1,-55.135684
"""Congo, Dem. Re…","""COD""",1964,2881500000.0,6213200000.0,1963,0.463779,1,-53.622097
"""Libya""","""LBY""",2011,34699000000.0,74773000000.0,2010,0.46406,1,-53.593959
"""Suriname""","""SUR""",1989,542520000.0,1160900000.0,1988,0.467327,1,-53.267293


In [17]:
recession_df = gdp_evol_df.filter(
    (pl.col('gdp_evol_percent')<0) & (pl.col('year')==2013))

print(recession_df.shape)

recession_df

(32, 9)


cty_name,cty_code,year,gdp,prev_gdp,prev_year,gdp_evol,nb_year,gdp_evol_percent
str,str,i64,f64,f64,i64,f64,i64,f64
"""Afghanistan""","""AFG""",2013,2.0046e10,2.0537e10,2012,0.97613,1,-2.387006
"""American Samoa…","""ASM""",2013,6.41e8,6.44e8,2012,0.995342,1,-0.465839
"""Antigua and Ba…","""ATG""",2013,1.1929e9,1.2114e9,2012,0.98474,1,-1.526013
"""Bahamas, The""","""BHS""",2013,1.0677e10,1.0720e10,2012,0.995961,1,-0.403899
"""Barbados""","""BRB""",2013,4.6125e9,4.6564e9,2012,0.990583,1,-0.941725
"""Brunei Darussa…","""BRN""",2013,1.8094e10,1.9048e10,2012,0.949882,1,-5.011764
"""Bhutan""","""BTN""",2013,1.7983e9,1.8237e9,2012,0.986095,1,-1.390497
"""Central Africa…","""CAF""",2013,1.5186e9,2.1842e9,2012,0.695255,1,-30.474475
"""Cyprus""","""CYP""",2013,2.4085e10,2.5041e10,2012,0.961791,1,-3.820876
"""Micronesia, Fe…","""FSM""",2013,3.160406e8,3.261287e8,2012,0.969067,1,-3.093288
