In [1]:
#
#IMPORTS
#
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import imageio as io
import os
import math

In [2]:
#
#READ FILES
#

#MAIN GDP DATA FILE
gdp_data = pd.read_csv("./API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10576830/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10576830.csv",
                       skiprows=4)

#GDP METADATA FILE WITH COUNTRY INFO
country_data =pd.read_csv(
    "./API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10576830/Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_10576830.csv")

#POPULATION DATA FILE
pop_data = pd.read_csv("./API_SP.POP.TOTL_DS2_en_csv_v2_10576638/API_SP.POP.TOTL_DS2_en_csv_v2_10576638.csv", 
                       skiprows=4)




In [3]:
#
#CREATE DATAFRAME OF REGIONS (MATCH EVERY COUNTRY IN GDP FILE TO IT'S REGION IN THE METADATA FILE)
#
country_regions = country_data.loc[(country_data['Country Code'].isin(gdp_data['Country Code']) )&
                                   (country_data['Region'].notnull())]

In [4]:
#
#ITERATE THROUGH COUNTRIES AND MATCH REGION TO COLOR
#
country_colors = []
for country_region in country_regions['Region']:
    if country_region=='Europe & Central Asia':
        country_colors.append('Red')
    if country_region=='Sub-Saharan Africa':
        country_colors.append('Blue')
    if country_region=='Latin America & Caribbean':
        country_colors.append('Green')
    if country_region=='East Asia & Pacific':
        country_colors.append('purple')
    if country_region=='Middle East & North Africa':
        country_colors.append('Black')
    if country_region=='South Asia':
        country_colors.append('Orange')
    if country_region=='North America':
        country_colors.append('Grey')

In [5]:
#
#REMOVE AGGREGATE RECORDS FROM DATA (AGGREGATE RECORDS HAVE NULL REGIONS IN THE COUNTRY METADATA)
#
gdp = gdp_data.loc[gdp_data['Country Code'].isin( 
                    country_data.loc[country_data['Region'].notnull()]['Country Code'].tolist())]
pop = pop_data.loc[pop_data['Country Code'].isin( 
                    country_data.loc[country_data['Region'].notnull()]['Country Code'].tolist())]

In [6]:
#
#CREATE PLOTS
#

#ITERATE OVER YEARS IN DATA
for years in range(1960,2019):
    #CONVERT YEARS TO STRING SO WE CAN USE IT AS A DATAFRAME INDEX
    year = str(years)
    #CREATE THE PLOT
    fig, ax = plt.subplots(figsize=(12,12))
    
    #ALTERNATE PLOT USING .SCATTER()
    #plt.scatter(pop[year],gdp[year]/pop[year],c=country_colors)
    #(SIMPLER, BUT HARDER FOR THE VIEWER TO SEE WHAT IS GOING ON)
    
    #ITERATE OVER COUNTRIES IN DATA
    for i in range(len(gdp[year])):
        #ADD COUNTRY TO PLOT IF GDP AND POPULATION ARE NOT NULL
        if (np.isnan(gdp[year].iat[i])==False)&(np.isnan(pop[year].iat[i])==False):
            plt.text(x=pop[year].iat[i], #POPULATION
                    y=gdp[year].iat[i]/(pop[year].iat[i]), #GDP PER CAPITA
                    c=country_colors[i], #REGION COLOR
                    s=gdp['Country Code'].iat[i]) #TEXT MARKER IS COUNTRY CODE
    #AXIS LABELS
    plt.ylabel('GDP per Capita')
    plt.xlabel('Population')
    #PLOT TITLE
    plt.title(year, fontsize=16)
    #SET AXIS SCALE (WE ARE USING THIS AS A CHEAT TO REMOVE OUTLIERS)
    ax.set_xlim(0,100_000_000)
    ax.set_ylim(0,150_000)
    #FORMAT AXIS TICKS TO WHOLE NUMBERS
    #(THIS WAS A REAL PAIN, ADAPTED FROM 
    #https://stackoverflow.com/questions/25973581/how-do-i-format-axis-number-format-to-thousands-with-a-comma-in-matplotlib)
    #ax.yaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter(lambda x, p: format(int(x), ',')))
    #ax.xaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter(lambda x, p: format(int(x), ',')))
    
    ax.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
    ax.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda y, p: format(int(y), ',')))
    # SAVE FILE WITH YEAR INDEX
    filename = f'gdppop{year}'
    plt.savefig(filename,dpi=150)
    #CLOSE PLOTS
    plt.close('all')

In [7]:
#
#MAKE GIF (ADAPTED FROM https://stackoverflow.com/questions/41228209/making-gif-from-images-using-imageio-in-python)
#

#CREATE SORTED LIST OF FILENAMES
file_names = sorted((fn for fn in os.listdir('.') if fn.startswith('gdppop')))
#USE IMAGE IO TO CREATE A GIF
with io.get_writer('gdppop.gif', mode='I', duration=0.1) as writer:
    #ITERATE OVER FILENAMES
    for filename in file_names:
        #READ IN FILE
        image = io.imread(filename)
        #APPEND FILE TO GIF
        writer.append_data(image)
writer.close()