# Economic Analysis of Countries
## This notebook analyzes economic indicators for 49 countries using Wikipedia and World Bank data, including clustering, time-series analysis, and interactive visualizations.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
import plotly.express as px
import os

# Create plots directory
os.makedirs('plots', exist_ok=True)

# Load processed data
df_static = pd.read_csv('data/processed_economic_data.csv')
df_time_series = pd.read_csv('data/processed_worldbank_data.csv')
print(f'Static data shape: {df_static.shape}')
print(f'Time-series data shape: {df_time_series.shape}')

# Correlation analysis
numeric_cols = ['GDP_Current_USD', 'GDP_Per_Capita_Calc', 'GDP_Growth', 'Inflation_Rate', 
                'Unemployment_Rate', 'Gini_Coefficient', 'Population_WB']
corr = df_static[numeric_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation of Economic Indicators')
plt.savefig('plots/correlation_heatmap.png')
plt.show()

# Clustering analysis
X = df_static[['GDP_Per_Capita_Calc', 'Gini_Coefficient']].dropna()
kmeans = KMeans(n_clusters=3, random_state=42)
df_static['Cluster'] = pd.Series(kmeans.fit_predict(X), index=X.index)

# Interactive scatter plot
fig = px.scatter(df_static, x='GDP_Per_Capita_Calc', y='Gini_Coefficient', color='Cluster',
                 size='Population_WB', hover_data=['Country', 'Unemployment_Rate'],
                 title='Economic Clustering of Countries')
fig.write_html('plots/economic_clustering.html')  # Fixed
fig.show()

# Time-series plot of GDP per capita growth
fig = px.line(df_time_series, x='Year', y='GDP_Per_Capita_Growth', color='Country',
              title='GDP Per Capita Growth (2015-2023)')
fig.update_layout(showlegend=False)
fig.write_html('plots/gdp_per_capita_growth.html')  # Fixed
fig.show()

# Choropleth map of Gini coefficient
fig = px.choropleth(df_static, locations='Country', locationmode='country names',
                    color='Gini_Coefficient', hover_data=['Country', 'GDP_Per_Capita_Calc'],
                    title='Gini Coefficient by Country (2023)',
                    color_continuous_scale='Viridis')
fig.write_html('plots/gini_choropleth.html')  # Fixed
fig.show()

# Bar plot of unemployment rate
plt.figure(figsize=(12, 6))
sns.barplot(x='Country', y='Unemployment_Rate', data=df_static)
plt.xticks(rotation=45, ha='right')
plt.title('Unemployment Rate by Country (2023)')
plt.tight_layout()
plt.savefig('plots/unemployment_bar.png')
plt.show()


## Insights
- Countries with high GDP per capita often have lower Gini coefficients, indicating less income inequality.
- Clustering identifies three groups: high-income/low-inequality, emerging economies, and high-inequality/low-income countries.
- Time-series data shows varied GDP per capita growth trends, with some countries recovering faster post-2020.
- Unemployment rates vary significantly, with potential correlation to economic development levels.
- Further analysis could explore causal relationships or forecasting using time-series models.