In [None]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import pycountry_convert as pc
import PIL
import io
import kaleido

In [None]:
gdp_pcap = pd.read_csv('../data/gdp_pcap.csv')
life_expectancy = pd.read_csv('../data/life_expectancy.csv')
population = pd.read_csv('../data/population.csv')

In [None]:
gdp_pcap.head()

In [None]:
# Check for null values for each of them
gdp_pcap.info()

In [None]:
gdp_pcap.describe()

In [None]:
gdp_pcap.isnull()

In [None]:
gdp_pcap.duplicated()

In [None]:
life_expectancy.info()

In [None]:
life_expectancy.describe()

In [None]:
life_expectancy.isnull()

In [None]:
life_expectancy.duplicated()

In [None]:
population.info()

In [None]:
population.describe()

In [None]:
population.isnull()


In [None]:
population.duplicated()

In [None]:
gdp_new = pd.melt(gdp_pcap, id_vars='country', var_name='year',value_name='gdp', ignore_index=True)
gdp_new.head()

In [None]:
gdp_new.describe()

In [None]:
print(gdp_new)

In [None]:
gdp_new['gdp'] = gdp_new['gdp'].replace({'k': '*1e3', 'M': '*1e6', 'B': '*1e9', np.nan :'0'}, regex=True).map(pd.eval).astype(int)
gdp_new['year'] = gdp_new['year'].astype(int)
print(gdp_new)


In [None]:
def cleaner(df,value_name,value_name_dtype):
    x = pd.melt(df, id_vars='country', var_name='year', value_name= value_name, ignore_index=True)
    x[value_name] = x[value_name].replace({'k': '*1e3', 'M': '*1e6', 'B': '*1e9', np.nan :'0'}, regex=True).map(pd.eval).astype(value_name_dtype)
    x['year'] = x['year'].astype(int)
    return x
pop_new = cleaner(population, 'pop', 'int')
lex_new = cleaner(life_expectancy, 'lex', 'float')



In [None]:
pop_new.head()

In [None]:
merge_gdp_pop = gdp_new.merge(pop_new, how='left', left_on=['country', 'year'], right_on=['country','year'])


In [None]:
gapminder_df = merge_gdp_pop.merge(lex_new, how='left',left_on=['country', 'year'], right_on=['country','year'])

print(gapminder_df)

In [None]:
gapminder_df.dropna(inplace=True)
gapminder_df.reset_index(drop=True, inplace=True)
gapminder_df["gdp"] = gapminder_df["gdp"].astype(int)
gapminder_df["pop"] = gapminder_df["pop"].astype(int)


In [None]:
print(gapminder_df)

In [None]:
gapminder_df['country_code'] = gapminder_df['country'].apply(lambda x: pc.country_name_to_country_alpha2(x, cn_name_format="default"))
gapminder_df.head()

In [None]:
gapminder_df[gapminder_df['country_code']=='TL'] = gapminder_df[gapminder_df['country_code']=='TL'].assign(country_code='TP')
print(gapminder_df.query("country=='Timor-Leste'"))

In [None]:
gapminder_df['continent_code'] = gapminder_df['country_code'].apply(lambda x: pc.country_alpha2_to_continent_code(x))
gapminder_df.head()

In [None]:
gapminder_df['continent'] = gapminder_df["continent_code"].apply(lambda x: pc.convert_continent_code_to_continent_name(x))
gapminder_df.head()

In [None]:
gapminder_df.drop(columns=['country_code','continent_code'], inplace=True)


In [None]:
gapminder_df.head()

In [None]:
df = gapminder_df.query("year>1899 and year <2024")#[(gapminder_df['year'] > 1899) & (gapminder_df['year'] < 2024)]
print(df)

In [None]:
fig = px.scatter(df, x='gdp', y='lex', animation_frame='year', animation_group='country',
       size='pop', color='continent', hover_name='country', log_x = True, size_max=100,
      range_x=[500,200000], range_y=[0,90], labels=dict(gdp="GDP Per Capita ($)",
        lex="Life Expectancy (years)", continent="Continents"),
        title = 'GDP Per Capita, Life Expectancy & Population (1900-2023)')
fig.update_layout(title_x=0.5,
              xaxis=dict(showgrid=False, tickmode = 'array',
                tickvals = [500, 1000, 10000, 100000, 200000],
                ticktext = [500, 1000, '10K', '100K','200K']),
              yaxis=dict(showgrid=False))

fig.show()

In [None]:
import kaleido
from kaleido.scopes.plotly import PlotlyScope
frames = []
for s, fr in enumerate(fig.frames):
    fig.update(data=fr.data)
    fig.layout.sliders[0].update(active=s)
frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png"))))

frames[0].save("/Users/anjali/Downloads/GitCode/time_series_analysis/data/animated_chart.gif",save_all=True, append_images=frames[1:], optimize=True, duration=100,loop=0)