In [2]:
import pandas as pd
import numpy as np

df_exports = pd.read_csv('../data/raw/raw_exports.csv', skiprows=4)

print("--- SHAPE ---")
print(df_exports.shape)
print("\n--- INFO ---")
print(df_exports.info())
print("\n--- MISSING VALUES PER COLUMN ---")
print(df_exports.isnull().sum())

--- SHAPE ---
(266, 70)

--- INFO ---
<class 'pandas.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 70 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country Name    266 non-null    str    
 1   Country Code    266 non-null    str    
 2   Indicator Name  266 non-null    str    
 3   Indicator Code  266 non-null    str    
 4   1960            72 non-null     float64
 5   1961            75 non-null     float64
 6   1962            76 non-null     float64
 7   1963            77 non-null     float64
 8   1964            78 non-null     float64
 9   1965            88 non-null     float64
 10  1966            90 non-null     float64
 11  1967            95 non-null     float64
 12  1968            96 non-null     float64
 13  1969            97 non-null     float64
 14  1970            127 non-null    float64
 15  1971            127 non-null    float64
 16  1972            128 non-null    float64
 17  1973    

In [3]:
year_columns = [str(year) for year in range(2015, 2025)]
df_recent = df_exports[['Country Name', 'Country Code'] + year_columns].copy()

df_recent['missing_count'] = df_recent[year_columns].isnull().sum(axis=1)

df_complete = df_recent[df_recent['missing_count'] < 3]
print(f"Countries with mostly complete data: {len(df_complete)}")

focus_countries = ['United States', 'China', 'Germany', 'Mexico', 'India', 'Vietnam']
df_focus = df_recent[df_recent['Country Name'].isin(focus_countries)]

print("\n--- FOCUS COUNTRIES QUALITY CHECK ---")
print(df_focus[['Country Name', 'missing_count']])

Countries with mostly complete data: 223

--- FOCUS COUNTRIES QUALITY CHECK ---
      Country Name  missing_count
40           China              0
55         Germany              0
109          India              0
154         Mexico              0
251  United States              0


In [4]:
import plotly.express as px

df_plot = df_focus.melt(
    id_vars=['Country Name', 'Country Code'], 
    value_vars=year_columns,
    var_name='Year', 
    value_name='Exports'
)

df_plot['Year'] = df_plot['Year'].astype(int)

df_plot = df_plot.dropna(subset=['Exports'])

fig = px.line(df_plot, 
              x='Year', 
              y='Exports', 
              color='Country Name',
              title='Exports of Goods & Services (2015-2024)',
              labels={'Exports': 'Export Value (USD)'},
              markers=True)
fig.show()