In [1]:
import ipywidgets as widgets
from ipywidgets import Layout
from ipywidgets import AppLayout, Button, GridspecLayout
from ipywidgets import interact, interact_manual
import nbimporter
from Project.Utils.visualize import  search
import warnings
warnings.filterwarnings("ignore")
from scipy import stats
import plotly.express as px
import os
from math import nan
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display_html
from Project.Utils.visualize import  searchTimeSeries, normalize_by_country
import Notebook_Time_Series as Nbook_Times


In [2]:
col_country = 'Country'
col_year = 'Year'
col_region = 'Region'


data_path = os.getcwd() + '/Databases/'
output_path = os.getcwd() + '/Output/'
country_path = output_path + '/Country/'
region_path = output_path + '/Region/'
cluster_path = output_path + '/Cluster/'


file_regions = '/AuxiliarData/world-regions-mod.csv'
file_gold = 'GoldDataframe.csv'
file_corr_pearson = 'Corr_DF_Pearson.csv'
file_corr_spearman = 'Corr_DF_Spearman.csv'
file_shifted_corr_country = 'Shifted_Corr_Country.csv'
file_shifted_corr_region = 'Shifted_Corr_Region.csv'


df_regions = pd.read_csv(data_path + file_regions, index_col = [col_region, col_country])
df_gold  = pd.read_csv(output_path + file_gold, index_col = [col_country, col_year, col_region])
df_corr_pearson = pd.read_csv(output_path + file_corr_pearson)
df_corr_spearman = pd.read_csv(output_path + file_corr_spearman, index_col = col_country)
df_shifted_corr_country = pd.read_csv(output_path + file_shifted_corr_country)
df_shifted_corr_region  = pd.read_csv(output_path + file_shifted_corr_region)


global PVALUE_VAR 
PVALUE_VAR = 0.05
country_list = list(np.sort(df_gold.index.get_level_values('Country').unique()))
region_list = list(np.sort(df_gold.index.get_level_values('Region').unique()))
indicators = list(df_gold.columns)
indicators.remove('GDP')
min_year = min(df_gold.index.get_level_values('Year').unique())
max_year = max(df_gold.index.get_level_values('Year').unique())

In [3]:
indicator = widgets.SelectMultiple(
    options = df_corr_spearman.columns.tolist(),
    value = [df_corr_spearman.columns[0]],
    description = 'Indicator',
    disabled = False,
    layout = Layout(width = '50%', height = '80px')
)



def globalGrapgh(indicator):
    ind = indicator[0]
    N = 10
    fig = px.choropleth(df_corr_spearman, locations = df_corr_spearman.index, locationmode='country names', 
                        color = ind, projection="natural earth",
                        color_continuous_scale='RdBu',
                        width = 700, height=500)

    pos_corr = df_corr_spearman.drop(df_corr_spearman.columns.difference([ind]), axis = 1).sort_values(by = ind, axis = 0, ascending = False).head(n = N)
    neg_corr = df_corr_spearman.drop(df_corr_spearman.columns.difference([ind]), axis = 1).sort_values(by = ind, axis = 0, ascending = True).head(n = N)

    pos_corr = pos_corr.loc[pos_corr[ind] > 0]
    neg_corr = neg_corr.loc[neg_corr[ind] < 0]


    fig.update(layout_coloraxis_showscale=True)
    fig.show()
    
    pos_styler = pos_corr.style.set_table_attributes("style='display:inline'").set_caption('Direct correlation')
    neg_styler = neg_corr.style.set_table_attributes("style='display:inline'").set_caption('Inverse correlation')

    space = "\xa0" * 10
    display_html(pos_styler._repr_html_() + space  + neg_styler._repr_html_(), raw=True)

widgets.interactive(globalGrapgh, indicator = indicator)

interactive(children=(SelectMultiple(description='Indicator', index=(0,), layout=Layout(height='80px', width='…

In [4]:
def tableOut(threshold, country):

    df = search(threshold, col_country, country)
    if df.empty:
        return print("No indicators have been found.")
          
    left1 = pd.Series([PVALUE_VAR, PVALUE_VAR], index=['P-value Pearson', 'P-value Spearman'])
    left2 = pd.Series([-1, -1], index=['GDP Pearson Corr', 'GDP Spearman Corr'])
    left3 = pd.Series([0, 0], index=['GDP Pearson Corr', 'GDP Spearman Corr'])
    df = df.style.highlight_between(left = left1, right = 1.5, axis = 1, props='color:white; background-color:red;')\
                 .highlight_between(left = left2, right = 1.5, axis = 1, props='color:white; background-color:#929bfc;')\
                 .highlight_between(left = left3, right = 1.5, axis = 1, props='color:white; background-color:#b3b9ff;')\
                 .format('{:,.4f}', subset = ['GDP Pearson Corr', 'GDP Spearman Corr'])\
                 .format('{:,.12f}', subset = ['P-value Pearson', 'P-value Spearman']) 
    
    
    display(df)



@interact(
    country = country_list,
    threshold = (0, 1, 0.05))
def g(country = 'Afghanistan', threshold = 0.7):
    return tableOut(threshold ,country)

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Angola', …

In [5]:
median_corr_df_pearson_region = pd.merge(df_corr_spearman, df_regions, how = 'inner', left_index = True, right_index = True).groupby(level = col_region).median()
median_corr_df_spearman_region = pd.merge(df_corr_spearman, df_regions, how = 'inner', left_index = True, right_index = True).groupby(level = col_region).median()

def tableRegMed(region, threshold):    
    median_corr_series_pearson_region = median_corr_df_pearson_region.loc[region]
    median_corr_series_pearson_region.name = 'GDP Pearson Corr'
    median_corr_series_spearman_region = median_corr_df_spearman_region.loc[region]
    median_corr_series_spearman_region.name = 'GDP Spearman Corr'
    df = pd.concat([median_corr_series_pearson_region, median_corr_series_spearman_region], axis = 1)
    df = df.loc[(abs(df['GDP Pearson Corr']) >= threshold) & (abs(df['GDP Spearman Corr']) >= threshold)]

    if df.empty:
        return print("No indicators have been found.")

    df = df.sort_values(by = df.columns[0], ascending = False, key = lambda row: df.sum(axis = 1))

    left1 = pd.Series([-1, -1], index = ['GDP Pearson Corr', 'GDP Spearman Corr'])
    left2 = pd.Series([0, 0], index = ['GDP Pearson Corr', 'GDP Spearman Corr'])
    df = df.style\
        .highlight_between(left = left1, right = 1.5, axis = 1, props = 'color:white; background-color:#929bfc;')\
        .highlight_between(left = left2, right = 1.5, axis = 1, props = 'color:white; background-color:#b3b9ff;')\
        .format('{:,.4f}', subset = ['GDP Pearson Corr', 'GDP Spearman Corr'])\
    
    display(df)



@interact(
    region = region_list,
    threshold = (0, 1, 0.05))
def g(region = region_list[0], threshold = 0.7):
    return tableRegMed(region, threshold)


interactive(children=(Dropdown(description='region', options=('East Asia and Pacific', 'Europe and Central Asi…

In [6]:
median_corr_df_pearson = pd.merge(df_corr_spearman, df_regions, how = 'inner', left_index = True, right_index = True).median().rename('GDP Pearson Corr')
median_corr_df_spearman = pd.merge(df_corr_spearman, df_regions, how = 'inner', left_index = True, right_index = True).median().rename('GDP Spearman Corr')

def tableWorldMed(threshold):
    df = pd.concat([median_corr_df_pearson, median_corr_df_spearman], axis = 1)
    df = df.loc[(abs(df['GDP Pearson Corr']) >= threshold) & (abs(df['GDP Spearman Corr']) >= threshold)]

    if df.empty:
        return print("No indicators have been found.")

    df = df.sort_values(by = df.columns[0], ascending = False, key = lambda row: df.sum(axis = 1))

    left1 = pd.Series([-1, -1], index = ['GDP Pearson Corr', 'GDP Spearman Corr'])
    left2 = pd.Series([0, 0], index = ['GDP Pearson Corr', 'GDP Spearman Corr'])
    df = df.style.highlight_between(left = left1, right = 1.5, axis = 1, props = 'color:white; background-color:#929bfc;')\
                 .highlight_between(left = left2, right = 1.5, axis = 1, props = 'color:white; background-color:#b3b9ff;')\
                 .format('{:,.4f}', subset = ['GDP Pearson Corr', 'GDP Spearman Corr'])\

    display(df)



@interact(
    threshold = (0, 1, 0.05))
def g(threshold = 0.7):
    return tableWorldMed(threshold)

interactive(children=(FloatSlider(value=0.7, description='threshold', max=1.0, step=0.05), Output()), _dom_cla…

In [7]:
class WidgetTimeWindowCountryStatus:
    def __init__(self):
        self.filter_by = "Country"
        self.zone = None
        self.data = None
        
status_widget = WidgetTimeWindowCountryStatus()

In [4]:
# Precompute contries by region normalized
countries_by_region = {}
for r in region_list:
    countries_by_region[r] = Nbook_Times.load_by_region(r, df_gold)

[5.45 seconds] Normalized Region East Asia and Pacific
[17.08 seconds] Normalized Region Europe and Central Asia
[6.15 seconds] Normalized Region Latin America and Caribbean
[3.76 seconds] Normalized Region Middle East and North Africa
[0.52 seconds] Normalized Region North America
[2.08 seconds] Normalized Region South Asia
[10.08 seconds] Normalized Region Sub-Saharan Africa


In [9]:
from tkinter import E

def time_window_zone(by, zone, threshold , years):
    # Update Widget Status
    if by == 'Country':
        if status_widget.filter_by != "Country":
            status_widget.filter_by = "Country"
            zone_drop.options = country_list
            return
        if status_widget.zone != zone:
            #Search for entries of the country.
            status_widget.data = df_gold.loc[df_gold.index.get_level_values('Country') == zone]
            status_widget.zone = zone

    elif by == 'Region':
         if status_widget.filter_by != "Region":
            status_widget.filter_by = "Region"
            zone_drop.options = region_list
            return
         if status_widget.zone != zone:
            #Search for entries of the country.
            status_widget.data = countries_by_region[zone] 
            status_widget.zone = zone
    
    
    # Update Widget Data
    df_zone = status_widget.data
    
    #Load the selected year range and the global range.
    df_time = searchTimeSeries(threshold, years[0], years[1], True, df_zone)
    df_global = searchTimeSeries(threshold, years[0], years[1], False, df_zone)


    # Display Data
    if years[0] > years[1]: return print("Please, select a valid range of years.")
 
    space = "\xa0" * 10
    try:
        df_time = Nbook_Times.styler_method(df_time, str(years[0]) + '-' + str(years[1]), PVALUE_VAR)._repr_html_()
    except Exception as e: 
        df_time = 'No indicators available for the selected parameters'
    
    try:
        df_global = Nbook_Times.styler_method(df_global, '2000-2020', PVALUE_VAR)._repr_html_()
    except: 
        df_global = 'No indicators available for the selected parameters'

    display_html(df_time + space  + df_global, raw=True)
    
by_drop = widgets.Dropdown(
    options= ['Country', 'Region'],
    description='By: ',
)

intslider = widgets.IntRangeSlider(
    value=[min(df_gold.index.get_level_values('Year').unique()), max(df_gold.index.get_level_values('Year').unique())],
    min= min(df_gold.index.get_level_values('Year').unique()),
    max= max(df_gold.index.get_level_values('Year').unique()),
    step=1,
    description='Years: ',
)

zone_drop = widgets.Dropdown(
    options= country_list,
    value='Afghanistan',
    description='Zone: ',
)

floatslider = widgets.FloatSlider(
    value=0.7,
    min=0,
    max=1.0,
    step=0.05,
    description='Threshold:',
)

widgets.interact(time_window_zone, by = by_drop, zone = zone_drop, threshold = floatslider, years = intslider)

interactive(children=(Dropdown(description='By: ', options=('Country', 'Region'), value='Country'), Dropdown(d…

<function __main__.time_window_zone(by, zone, threshold, years)>

In [10]:
def table_high_country(by, zone):
    if by == 'Country':
        if len(zone_drop_high.options) == len(region_list): 
            zone_drop_high.options = country_list
            return
        #Search for the entries of the country.
        df_zone  = df_gold.loc[df_gold.index.get_level_values('Country') == zone]
        min_diff = 5
    
        
    elif by == 'Region':
        if len(zone_drop_high.options) == len(country_list): 
            zone_drop_high.options = region_list
            return
        #Search for the entries of the region and normalize.
        df_zone = countries_by_region[zone]
        min_diff = 2
    
    df_highest = Nbook_Times.init_highest_table(indicators)
    
    i = 0
    computing_text = "Loading "
    print(computing_text, end="\r")
    
    #For all the combination of years...
    for years in Nbook_Times.generate_years_combinations(min_diff, min_year, max_year):
        
        i = (i + 1) % 50
        print (computing_text + "".join(["." for _ in range(i)]), end="\r")
        
        df_aux = searchTimeSeries(0, years[0], years[1], True, df_zone)
        #Delete indicators which are not available that year
        indicators_inter = list(set(indicators) & set(list(df_aux.index)))

        #For all the indicators availble that year....
        for indicator in indicators_inter:
            #Algorithm to search for the highest value
            indicator_corr_pos_last = df_highest[df_highest.index.get_level_values(0) == indicator]["Highest positive Spearman corr"][0]
            indicator_corr_neg_last = df_highest[df_highest.index.get_level_values(0) == indicator]["Highest negative Spearman corr"][0]
            
          
            indicator_corr_aux = df_aux[df_aux.index.get_level_values(0) == indicator]["GDP Spearman Corr"][0]
            indicator_p_value_aux = df_aux[df_aux.index.get_level_values(0) == indicator]["P-value Spearman"][0]

            if indicator_corr_aux != nan and indicator_p_value_aux < PVALUE_VAR:
                if indicator_corr_pos_last < indicator_corr_aux and indicator_corr_aux > 0:
                    df_highest.at[indicator, "Year range"] = str(years[0]) + '-' + str(years[1])
                    df_highest.at[indicator, "Highest positive Spearman corr"] = indicator_corr_aux
                elif indicator_corr_neg_last > indicator_corr_aux and indicator_corr_aux < 0:
                    df_highest.at[indicator, "Year range "] = str(years[0]) + '-' + str(years[1])
                    df_highest.at[indicator, "Highest negative Spearman corr"] = indicator_corr_aux

    df_highest = df_highest.replace(0, nan).dropna(axis=0, how='all').fillna("-")
    
    print("                                                                                    ", end="\r")


    display(df_highest)

by_drop_high = widgets.Dropdown(
    options= ['Country', 'Region'],
    description='By: ',
)

zone_drop_high = widgets.Dropdown(
    options= country_list,
    value ='Afghanistan',
    description='Zone:',
)

widgets.interact(table_high_country, by = by_drop_high, zone = zone_drop_high)


interactive(children=(Dropdown(description='By: ', options=('Country', 'Region'), value='Country'), Dropdown(d…

<function __main__.table_high_country(by, zone)>

In [5]:

def plot_year_range(by, zone, indicator, years):
    if by == 'Country':
        if list(zone_drop_plot.options) != country_list:
            zone_drop_plot.options = country_list
            return
        df_zone  = df_gold.loc[df_gold.index.get_level_values('Country') == zone]
    
    elif by == 'Region':
        if list(zone_drop_plot.options) != region_list:
            zone_drop_plot.options = region_list
            return
        df_zone = countries_by_region[zone]

    df_zone = df_zone.loc[(df_zone.index.get_level_values("Year") >= years[0]) & (df_zone.index.get_level_values("Year") <= years[1])]
    spear = stats.spearmanr(df_zone[indicator], df_zone['GDP'])
    df_zone = normalize_by_country(df_zone)


    #In case if by is region it groups by year.
    df_zone = df_zone.loc[df_zone.index.get_level_values(by_drop_plot.value) == zone, ['GDP', indicator]].groupby(level = 'Year').median()

    print(spear)
    plt.figure(figsize=(6,6))
    plt.plot(df_zone.index.get_level_values("Year"), df_zone["GDP"], color="red", label = 'GDP')
    plt.plot(df_zone.index.get_level_values("Year"), df_zone[indicator], color="green", label = indicator)
    plt.legend(loc="lower right")
    
by_drop_plot = widgets.Dropdown(
    options= ['Country', 'Region'],
    description='By: ',
)

zone_drop_plot = widgets.Dropdown(
    options= country_list,
    value='Afghanistan',
    description='Country: ',
)

indicator_drop_plot = widgets.Dropdown(
    options= sorted(indicators),
    value='AgriShareGDP',
    description='Indicator: :',
)

intslider_plot = widgets.IntRangeSlider(
    value=[min_year, max_year],
    min= min_year,
    max= max_year,
    step=1,
    description='Years:',
)

# TODO By Region: Say Y axis is Qualitative (Not real values but Normalized to observe evolution vs GDP - Tendendency)

widgets.interact(plot_year_range, by = by_drop_plot,  zone = zone_drop_plot, indicator = indicator_drop_plot, years = intslider_plot)

interactive(children=(Dropdown(description='By: ', options=('Country', 'Region'), value='Country'), Dropdown(d…

<function __main__.plot_year_range(by, zone, indicator, years)>

In [5]:

by_drop_plot = widgets.Dropdown(
    options= ['Country', 'Region'],
    description='By: ',
)

zone_drop_plot = widgets.Dropdown(
    options= country_list,
    value='Afghanistan',
    description='Country: ',
)

indicator_drop_plot = widgets.Dropdown(
    options= sorted(indicators),
    value='AgriShareGDP',
    description='Indicator: :',
)

intslider_plot = widgets.IntRangeSlider(
    value=[min_year, max_year],
    min= min_year,
    max= max_year,
    step=1,
    description='Years:',
)

# TODO By Region: Say Y axis is Qualitative (Not real values but Normalized to observe evolution vs GDP - Tendendency)

widgets.interact(Nbook_Times.plot_widget(df_gold, countries_by_region), by = by_drop_plot,  zone = zone_drop_plot, indicator = indicator_drop_plot, years = intslider_plot)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().