# Animated Scatterplot

#### Initial setup

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import imageio
plt.style.use('ggplot')

#### Import data:

In [2]:
life = pd.read_excel('../data/gapminder_lifeexpectancy.xlsx', index_col=0)
fert = pd.read_csv('../data/gapminder_total_fertility.csv', index_col=0)
popu = pd.read_excel("../data/gapminder_population.xlsx", index_col=0)
cont = pd.read_csv("../data/continents.csv", sep=";", index_col=1)

#### Check shape:

In [3]:
print(life.shape)
print(fert.shape)
print(popu.shape)
print(cont.shape)

(260, 217)
(260, 216)
(275, 81)
(194, 1)


#### Check column dtype:

In [5]:
fert.columns = fert.columns.astype(int)

print(life.columns.dtype)
print(fert.columns.dtype)
print(popu.columns.dtype)
# con doesn't have a "year" feature -> no check needed

int64
int64
int64


#### Correct index names:

In [29]:
life.index.name="country"
fert.index.name="country"
popu.index.name="country"

#### Reset index for melting:

In [30]:
fert = fert.reset_index()
life = life.reset_index()
popu = popu.reset_index()
cont = cont.reset_index()

#### Like ice in the sunshine:

In [31]:
# cont already has right format for merging!

fert = fert.melt(id_vars='country', var_name='year', value_name='fertility_rate')
life = life.melt(id_vars='country', var_name='year', value_name='life_expectancy')
popu = popu.melt(id_vars='country', var_name='year', value_name='population')

#### Merge all DF's and drop NaN Rows

In [32]:
merged_df = fert.merge(popu)
merged_df = merged_df.merge(life)
merged_df = merged_df.merge(cont)
merged_df = merged_df.dropna()
merged_df.sample(10)

Unnamed: 0,country,year,fertility_rate,population,life_expectancy,continent
133,Albania,1987,3.13,3121336.0,73.14,Europe
386,Angola,1997,6.95,13801868.0,51.7,Africa
4168,Estonia,1972,2.13,1386099.0,70.48,Europe
13725,Vanuatu,1971,6.2,88023.0,52.21,Australia and Oceania
9278,Nicaragua,1979,6.23,3157356.0,65.28,North America
3691,Dominican Republic,1981,4.3,5943591.0,66.6,North America
3257,Cyprus,1952,3.73,506627.0,66.58,Europe
12037,Swaziland,1984,6.46,680253.0,58.0,Africa
4402,Finland,1963,2.66,4522727.0,69.19,Europe
9817,Panama,1951,5.7,881346.0,56.42,North America


#### Determine minmax values for plotaxis and marker size norm:

In [33]:
plot_ymax = merged_df["fertility_rate"].max()
plot_ymin = merged_df["fertility_rate"].min()
plot_xmax = merged_df["life_expectancy"].max()
plot_xmin = merged_df["life_expectancy"].min()
minsize = merged_df["population"].min()
maxsize = merged_df["population"].max()

In [34]:
print(plot_xmin, plot_xmax, plot_ymin, plot_ymax, minsize, maxsize)

4.0 83.3 1.13 9.22 2128.0 1376048943.0


#### Create the plots:

In [35]:
opac = 0.6
for year in range(1960, 2016):

    merged_subdf = merged_df.loc[merged_df["year"] == year]
    plt.figure(figsize=(16,8))
    
    # named the plot for legend handles and labels generation later

    fig = sns.scatterplot(
        data=merged_subdf,
        x="life_expectancy",
        y="fertility_rate",
        size="population",
        sizes=(50, 1000),
        size_norm=(minsize, maxsize),
        hue="continent",
        palette="bright",
        alpha=opac
        )
    
    
    plt.xlabel("Life Expectancy (years)")
    plt.ylabel("Fertility Rate")
    plt.title(f"Gapminder Data: {year}")
    plt.axis((plot_xmin, plot_xmax, plot_ymin, plot_ymax))
    
    # not satisfied with auto legend so:
    # create lists of handle and label matplotlib.objects
    # select only first 7 items in h an l for colors and continent name
    # give params so the legend stays always in the same spot
    # for loop sets alpha for handles

    h, l = fig.get_legend_handles_labels()
    leg = plt.legend(h[0:7], l[0:7], loc="lower left", bbox_to_anchor=(0, 0))
    for lh in leg.legendHandles:
        lh.set_alpha(opac)

    plt.savefig("plots/gapplot_"+str(year)+".png", format="png")
    plt.close()

#### Create the gif:

In [36]:
images = []

for i in range(1960, 2016):
    filename = "plots/gapplot_"+str(i)+".png".format(i-1960)
    images.append(imageio.imread(filename))

imageio.mimsave('gapm_anim_1960_2015.gif', images, fps=25)