In [1]:
import pandas as pd
import numpy as np
import ipywidgets
import matplotlib.pyplot as plt
import plotly.express as px
from IPython.display import display

In [2]:
df_le_gdp = pd.read_csv('full-data/life_expectancy_and_gdp.csv')
df_le_che = pd.read_csv('full-data/life_expectancy_and_health_expenditure.csv')

In [3]:
def normalize(numbers, min_val, max_val, range):
    range_val = max_val - min_val
    normalized = []
    for num in numbers:
        normalized_val = (num - min_val) / range_val
        normalized_val = normalized_val * (range[1] - range[0]) + range[0] # normalize to range
        normalized.append(normalized_val)
    return normalized

In [4]:
all_year_5 = sorted(df_le_gdp.Year.unique())
all_year_5 = [str(i) for i in all_year_5]
all_text_5 = ['All', 'Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
all_countries_5 = df_le_gdp.loc[df_le_gdp['Entity'] != 'World', ['Entity']]['Entity'].unique()
all_countries_5 = [str(i) for i in all_countries_5]

def show_scatter_plot_gdp(year, text):
    print('Each dot is a country. And the size of the dot refer to the population in a country')
    colors = {'Africa':'#D1AFE4', 'Americas':'#2587F0', 'Asia':'#21DA61', 'Europe':'#E9C213', 'Oceania':'#F791C1'}
    groups = ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
    translate = {'World': 'Thế giới', 'Oceania': 'Châu Úc', 'Europe': 'Châu Âu', 'Americas': 'Châu Mỹ', 'Asia': 'Châu Á', 'Africa': 'Châu Phi'}
    year = int(year)
    small_grouped_df = df_le_gdp.groupby('Entity')
    fig = plt.figure(figsize=(10, 6))
    ax1 = plt.subplot(1,1,1)
    x_arr, y_arr, size, continents = [], [], [], []
    y_labels = [30, 40, 50, 60, 70, 80, 90]
    if text == 'All':
        valid_con = ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
    else:
        valid_con = [text]
    for key, df_local in small_grouped_df:
        tmp = df_local.loc[df_local['Year'] == year, ['Life_exp', 'GDP_per_capita', 'Population', 'Entity', 'Continent']]
        if len(tmp) == 0:
            continue
        x = tmp.GDP_per_capita.values[0]
        y = tmp.Life_exp.values[0]
        p = tmp.Population.values[0]
        con = tmp.Continent.values[0]
        #if con not in groups:
            #continue
        x_arr.append(x)
        y_arr.append(y)
        size.append(p)
        continents.append(con)
    # normalize size
    norm_size = normalize(size, min(size), max(size), [100, 2000])
    # set up scatter plot
    for g in groups:
        indexes = [i for i in range(len(continents)) if continents[i] == g]
        tmp_x = []
        tmp_y = []
        tmp_s = []
        for i in indexes:
            tmp_x.append(x_arr[i])
            tmp_y.append(y_arr[i])
            tmp_s.append(norm_size[i])
        if g in valid_con:
            scatter = ax1.scatter(x=tmp_x, y=tmp_y, s=tmp_s, c=colors[g], alpha=0.7, edgecolors='none', label=translate[g])
        else:
            scatter = ax1.scatter(x=tmp_x, y=tmp_y, s=tmp_s, c='#EEEEEE', alpha=0.7, edgecolors='none', zorder=-1)
    

    # now plot both limits against eachother
    ax1.set_title("Tuổi thọ dự kiến và thu nhập bình quân đầu người (GDP) trong năm " + str(year))
    lgnd = ax1.legend(loc="lower right", numpoints=1, title='Châu lục', fontsize=10)
    ax1.set_yticks(y_labels)
    #change the marker size manually for both lines
    if text == 'All':
        lgnd.legendHandles[0]._sizes = [90]
        lgnd.legendHandles[1]._sizes = [90]
        lgnd.legendHandles[2]._sizes = [90]
        lgnd.legendHandles[3]._sizes = [90]
        lgnd.legendHandles[4]._sizes = [90]
    else:
        lgnd.legendHandles[0]._sizes = [90]
    h, l = plt.gca().get_legend_handles_labels()
    plt.xscale("log") 
    plt.xlabel("Thu nhập bình quân đầu người GDP (đv: Đô la Mỹ)")
    plt.ylabel("Tuổi thọ dự kiến (đv: năm)")
    plt.show()

In [5]:
# Create the animation widget
animation7 = ipywidgets.interact(show_scatter_plot_gdp, 
                                 year=ipywidgets.Combobox(value='2018', options=all_year_5, description='Year:', ensure_option=True,disabled=False),
                                 text=ipywidgets.Combobox(value='All', options=all_text_5, description='Mode:', ensure_option=True,disabled=False))
# Display the animation widget
display(animation7)

interactive(children=(Combobox(value='2018', description='Year:', ensure_option=True, options=('1950', '1951',…

<function __main__.show_scatter_plot_gdp(year, text)>

In [6]:
grps = df_le_che.groupby('Entity')
d = {}
for key, df_local in grps:
    years = df_local['Year'].tolist()
    che = df_local['CHE_per_capita(in US$)'].tolist()
    life = df_local['Life_exp'].tolist()
    continent = df_local['Continent'].tolist()[0]
    a, b = 10000, -1
    for i in years:
        if (str(che[i - 2000]) != 'nan'):
            a = min(a, i)
            b = max(b, i)
    d[key] = {
        'y': years,
        'c': che,
        'l': life,
        'con': continent,
        'min_year_data':a,
        'max_year_data':b
    }
all_text_4 = ['All', 'Africa', 'Americas', 'Asia', 'Europe', 'Oceania'] + list(df_le_che.Entity.unique())

In [7]:
def show_arrow_chart_che(year_range, text):
    # x_axis: che    y_axis: life_exp
    # find x_start and y_start for each country
    translate = {'World': 'Thế giới', 'Oceania': 'Châu Úc', 'Europe': 'Châu Âu', 'Americas': 'Châu Mỹ', 'Asia': 'Châu Á', 'Africa': 'Châu Phi'}
    colors = {'Africa':'#D1AFE4', 'Americas':'#2587F0', 'Asia':'#21DA61', 'Europe':'#E9C213', 'Oceania':'#F791C1'}
    fig = plt.figure(figsize=(12, 6))
    ax1 = plt.subplot(1,1,1)
    groups = df_le_che.groupby('Continent')
    if text == 'All':
        valid_continents = ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']
        valid_country = ['No']
    elif text in ['Africa', 'Americas', 'Asia', 'Europe', 'Oceania']:
        valid_continents = [text]
        valid_country = ['No']
    else:
        valid_continents = [d[text]['con']]
        valid_country = [text]
    for key, df_local in groups:
        #if key not in valid_continents:
            #continue
        x_start, y_start = [],[]
        x_end, y_end = [],[]
        list_of_countries = df_local['Entity'].tolist()
        for k in list_of_countries:
            if valid_country[0] != 'No' and k != valid_country[0]:
                continue
            ind_a = max(d[k]['min_year_data'], year_range[0])
            ind_b = min(d[k]['max_year_data'], year_range[1])
            x_start.append(d[k]['c'][ind_a-2000])
            y_start.append(d[k]['l'][ind_a-2000])
            x_end.append(d[k]['c'][ind_b-2000])
            y_end.append(d[k]['l'][ind_b-2000])
        if valid_country[0] != 'No':
                ax1.scatter(x_start, y_start, s=10, c=colors[key], alpha=0.7, edgecolors='none', label=valid_country[0])
        else:
            if key in valid_continents:
                ax1.scatter(x_start, y_start, s=10, c=colors[key], alpha=0.7, edgecolors='none', label=translate[key])
            else:
                ax1.scatter(x_start, y_start, s=10, c='#EEEEEE', alpha=0.7, edgecolors='none', zorder=-1)
        ax1.scatter(x_end, y_end, s=0.0001, c='white', alpha=0.01, zorder=-1)
        if key in valid_continents :
            arrowprops = dict(color=colors[key], arrowstyle='->', linewidth=0.5)
            for i in range(len(x_start)):
                if x_start[i] != x_end[i] and y_start[i] != y_end[i]:
                    ax1.annotate('', xytext=(x_start[i], y_start[i]), xy=(x_end[i], y_end[i]), arrowprops=arrowprops)
        else:
            arrowprops = dict(color='#EEEEEE', arrowstyle='->', linewidth=0.5)
            for i in range(len(x_start)):
                if x_start[i] != x_end[i] and y_start[i] != y_end[i]:
                    ax1.annotate('', xytext=(x_start[i], y_start[i]), xy=(x_end[i], y_end[i]), arrowprops=arrowprops, zorder=-1)
            
    ax1.set_title("Tuổi thọ dự kiến và chi phí chăm sóc sức khỏe, " +  str(year_range[0]) + ' vs ' + str(year_range[1]))
    if valid_country[0] == 'No':
        lgnd = ax1.legend(loc="lower right", numpoints=1, title='Châu lục', fontsize=10)
    else:
        lgnd = ax1.legend(loc="lower right", numpoints=1, title='Country', fontsize=10)
    #change the marker size manually for both lines
    if text == 'All':
        lgnd.legendHandles[0]._sizes = [90]
        lgnd.legendHandles[1]._sizes = [90]
        lgnd.legendHandles[2]._sizes = [90]
        lgnd.legendHandles[3]._sizes = [90]
        lgnd.legendHandles[4]._sizes = [90]
    else:
        lgnd.legendHandles[0]._sizes = [90]
    print('Log scale at x-axis')
    plt.xscale("log") 
    plt.xlabel("Chi phí chăm sóc sức khỏe trên đầu người (đv: Đô la Mỹ)")
    plt.ylabel("Tuổi thọ dự kiến(đv: năm)")
    plt.show()

In [8]:
# Create the animation widget
animation6 = ipywidgets.interact(show_arrow_chart_che, 
                                 year_range=ipywidgets.IntRangeSlider(value=[2000, 2019],min=2000,max=2019,step=1, description='Year:', disabled=False,orientation='horizontal', continuous_update=True), 
                                 text=ipywidgets.Combobox(value='All', options=all_text_4, description='Choose:', ensure_option=True,disabled=False))
# Display the animation widget
display(animation6)

interactive(children=(IntRangeSlider(value=(2000, 2019), description='Year:', max=2019, min=2000), Combobox(va…

<function __main__.show_arrow_chart_che(year_range, text)>