In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import requests

In [3]:
url = 'https://github.com/gdo-cambodia/cambodia202-1990-2022-GDPpc/raw/refs/heads/main/cambodia202-1990-2022-GDPpc.dta'
response = requests.get(url)

# Save the content to a local file
with open('cambodia_gdp.dta', 'wb') as f:
    f.write(response.content)

# Read the .dta file into a pandas DataFrame
df = pd.read_stata('cambodia_gdp.dta')

In [4]:
df.head()

Unnamed: 0,GID_2,id,year,province,district,y,ln_y,trend6_ln_y,trend100_ln_y,trend400_ln_y
0,KHM.1.1_2,1.0,1990,BântéayMéanchey,Malai,676.0,6.516193,6.545359,6.504154,6.428658
1,KHM.1.1_2,1.0,1991,BântéayMéanchey,Malai,711.0,6.566672,6.565005,6.517225,6.462579
2,KHM.1.1_2,1.0,1992,BântéayMéanchey,Malai,651.0,6.478509,6.579986,6.530417,6.49672
3,KHM.1.1_2,1.0,1993,BântéayMéanchey,Malai,1026.0,6.933423,6.5859,6.544344,6.531558
4,KHM.1.1_2,1.0,1994,BântéayMéanchey,Malai,625.0,6.437752,6.562111,6.559102,6.567528


## GDP per capita

In [30]:
# Describe 'y' by year
df_mean_y = df.groupby('year')['y'].describe().round(2)

In [37]:
fig = px.line(df_mean_y, x=df_mean_y.index, y=['mean', 'min', 'max'])
fig.update_layout(xaxis_title='Year', yaxis_title='GDP per capita')
fig.show()

In [45]:
# Calculate quantiles by year
yearly_quantiles = df.groupby('year')['y'].quantile([0.1, 0.25, 0.5, 0.75, 0.9]).unstack()

# Melt the DataFrame to long format
yearly_quantiles_long = yearly_quantiles.reset_index().melt(id_vars='year', var_name='quantile', value_name='y_value')

fig = px.line(yearly_quantiles_long, x='year', y='y_value', color='quantile',
              labels={'quantile': 'Percentile', 'y_value': 'y'},
              title='Time Series of Quantiles of y by Year')

fig.update_layout(xaxis_title='Year', yaxis_title='y')

fig.show()

In [39]:
fig = px.box(df, x='year', y='y', hover_name= 'district', hover_data = ['province'],)
fig.update_layout(xaxis_title='Year', yaxis_title='GDP per capita')
fig.show()

In [33]:
px.strip(df,
         x = 'y',
         y = 'province',
         range_x= [400, 12700],
         color = 'province',
         hover_name= 'district',
         hover_data = ['province'],
         animation_frame= 'year')

## Trend of GDP per capita

In [34]:
# Describe 'trend400_ln_y' by year
df_mean_trend400_ln_y = df.groupby('year')['trend400_ln_y'].describe().round(2)

In [38]:
fig = px.line(df_mean_trend400_ln_y, x=df_mean_trend400_ln_y.index, y=['mean', 'min', 'max'])
fig.update_layout(xaxis_title='Year', yaxis_title='Trend of log GDP per capita')
fig.show()

In [47]:

# Calculate quantiles by year
yearly_quantiles = df.groupby('year')['trend400_ln_y'].quantile([0.1, 0.25, 0.5, 0.75, 0.9]).unstack()

# Melt the DataFrame to long format
yearly_quantiles_long = yearly_quantiles.reset_index().melt(id_vars='year', var_name='quantile', value_name='trend400_ln_y_value')

fig = px.line(yearly_quantiles_long, x='year', y='trend400_ln_y_value', color='quantile',
              labels={'quantile': 'Percentile', 'trend400_ln_y_value': 'trend400_ln_y'},
              title='Time Series of Quantiles of trend400_ln_y by Year')

fig.update_layout(xaxis_title='Year', yaxis_title='trend400_ln_y')

fig.show()

In [40]:
fig = px.box(df, x='year', y='trend400_ln_y', hover_name= 'district', hover_data = ['province'],)
fig.update_layout(xaxis_title='Year', yaxis_title='Trend log GDP per capita')
fig.show()

In [42]:
px.strip(df,
         x = 'trend400_ln_y',
         y = 'province',
         range_x= [6.10, 9.5],
         color = 'province',
         hover_name= 'district',
         hover_data = ['province'],
         animation_frame= 'year')