In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [3]:
try:
    base_path = Path(__file__).parent
except NameError:
    base_path = Path.cwd()  # fallback for notebooks

data_path = base_path / "brics_cleaned.csv"
df = pd.read_csv(data_path)

In [4]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [5]:
df = pd.read_csv("economic_analysis_data.csv")

In [6]:
df.shape

In [7]:
df.head()

In [8]:
df['Exports'].value_counts()

In [9]:
df.info()

In [10]:
df = df.drop(columns=['Exports'])

In [11]:
print(df.isnull().sum())

In [12]:
df.describe()

In [13]:
# list of countries
print(df['Country'].unique())

In [14]:
print(df['date'].unique())

In [15]:
import seaborn as sns

In [16]:
sns.heatmap(df.isnull(), cbar=False)
plt.show()

So there is no such NULL values

In [17]:
# extracting the year
df['Year'] = pd.to_datetime(df['date']).dt.year

In [18]:
df['Year'].head()

In [19]:
df['Year'].value_counts()

In [20]:
indicators = ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']

# Descriptive statistics per country and indicator
for col in indicators:
    print(f'Descriptive stats for {col}:')
    print(df.groupby('Country')[col].describe())
    print('\n')

In [21]:
for col in indicators:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x='Country', y=col, data=df)
    plt.title(f'{col} Distribution by Country')
    plt.show()

In [22]:
import plotly.express as px
fig = px.line(df, x='date', y='GDP', color='Country', markers=True,
              title='GDP Over Time by Country')
fig.show()

In [23]:
for col in ['Unemployment Rate', 'Inflation Rate', 'Current Account Balance']:
    fig = px.line(df, x='date', y=col, color='Country', markers=True,
                  title=f'{col} Over Time by Country')
    fig.show()

In [24]:
df['date'].dtype

In [25]:
bins = [1999, 2005, 2010, 2015, 2020, 2025]
labels = ['2000-2005', '2006-2010', '2011-2015', '2016-2020', '2021-2023']

df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['Year'] = df['date'].dt.year
df['Period'] = pd.cut(df['Year'], bins=bins, labels=labels)

period_summary = df.groupby(['Country', 'Period'])[['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']].mean().reset_index()
print(period_summary)

In [26]:
corr = df[['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']].corr()

# heatmap
plt.figure(figsize=(7,5))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap of Economic Indicators')
plt.show()

In [27]:
for country in df['Country'].unique():
    corr = df[df['Country'] == country][['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']].corr()
    plt.figure(figsize=(7,5))
    sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
    plt.title(f'Correlation Heatmap: {country}')
    plt.show()

In [28]:
import plotly.express as px

fig = px.scatter(df, x='GDP', y='Unemployment Rate', color='Country',
                 title='GDP vs. Unemployment Rate')
fig.show()

In [29]:
fig = px.scatter(
    df,
    x='Unemployment Rate',
    y='Inflation Rate',
    color='Country',
    title='Unemployment Rate vs. Inflation Rate')
fig.show()

In [30]:
indicators = ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']
df = df.sort_values(['Country', 'date'])

# Year-on-Year changes
for col in indicators:
    df[f'{col}_YoY_Change'] = df.groupby('Country')[col].pct_change() * 100

In [31]:
fig = px.line(
    df, x='date', y='GDP_YoY_Change', color='Country',
    title='Year-on-Year GDP Change (%) by Country')
fig.show()

In [32]:
print("Latest values per country:")
print(df.sort_values('date').groupby('Country').tail(1)[['Country', 'date', 'GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']])

In [33]:
print("Correlation matrix for all countries combined:")
print(df[['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']].corr())

In [34]:
for col in ['GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']:
    max_row = df.loc[df[f'{col}_YoY_Change'].idxmax()]
    min_row = df.loc[df[f'{col}_YoY_Change'].idxmin()]
    print(f"Largest positive YoY change in {col}: {max_row['Country']} {max_row['date']} {max_row[f'{col}_YoY_Change']:.2f}%")
    print(f"Largest negative YoY change in {col}: {min_row['Country']} {min_row['date']} {min_row[f'{col}_YoY_Change']:.2f}%")
    print('-' * 60)

In [35]:
for country in df['Country'].unique():
    print(f"\nMost recent 5 years for {country}:")
    print(df[df['Country'] == country].sort_values('date').tail(5)[['date', 'GDP', 'Unemployment Rate', 'Inflation Rate', 'Current Account Balance']])

**Insights**

- **Latest data (2023):**
  - **Brazil:** GDP dollar 2.17T, Unemployment 7.95 per cent, Inflation 213.9, CA Balance -dollar 30.8B
  - **India:** GDP dollar 3.55T, Unemployment 4.17 per cent, Inflation 216.9, CA Balance -dollar 30.2B
  - **South Africa:** GDP dollar 377.8B, Unemployment 27.99 per cent, Inflation 194.8, CA Balance -dollar 6.1B

- **Correlations:**
  - Higher GDP tends to mean lower unemployment.
  - GDP is positively linked to inflation, and negatively to current account balance.

- **Year-on-year highlights:**
  - Brazil saw the largest GDP rise (+33% in 2005) and fall (-27% in 2015).
  - India had the largest negative swing in current account balance (-1418% in 2005).
  - Unemployment in Brazil dropped sharply in 2022.

- **Insights:**
  - Brazil and India show GDP growth but face inflation and current account deficits.
  - South Africa struggles with high unemployment.

- **Recommendations:**  
  - Brazil and India: Focus on stabilizing inflation and deficits.
  - South Africa: Address persistent unemployment.

In [36]:
# Save your cleaned DataFrame
df.to_csv('brics_cleaned.csv', index=False)