# Visualize Notebook
In this notebook the Gold Dataframe will be read and extract information of it. The objective is to see the correlations between the variables and the GDP and also what countries have the highest correlation value.


## Imports
Start importing all the libraries and also the methods of pvalue and search indicators that will be used later in the notebook.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from scipy.stats import shapiro
import os
import statistics
import seaborn as sns
from scipy.stats import norm

import ipywidgets as widgets
from ipywidgets import Layout
from ipywidgets import interact, interact_manual
from Project.Utils.visualize import pvalue, search_indicators, search


## Correlation dataframe.
This dataframe is the main piece of the notebook. Consists in generating for every country the correlation matrix for it and saving only the correlation value of the different variables with the GDP. Later on is concatenated and generates the following result:

In [2]:
#One dataframe per country

df= pd.read_csv (os.getcwd()+'/Output/GoldDataframe.csv')
corr_df = pd.DataFrame()
corr_df.index.names = ['Country']
aux_df = pd.DataFrame()

#List all the countries, none repeated
countries = set(df['Country'].to_list())

country_dict = {}
corr_dict = {}

for country in countries:

    #Get the DataFrame for a given country
    country_df = df.loc[df['Country'] == country]

    #Correlation matrix for that country
    country_corr_df = country_df.corr()

    #Trim it into a single row
    country_corr_df = country_corr_df.rename(columns = {'GDP': country}).drop(index = ['Year', 'GDP'])

    #Add the row to a new DataFrame with the correlations for each country
    corr_df = pd.concat([corr_df, country_corr_df[country]], axis = 1)

#Transpose the resulting DataFrame to have the desired format and show it
corr_df = corr_df.transpose()
corr_df


Unnamed: 0,AgriShareGDP,CreditToAgriFishForest,EmploymentRural,TotalAgri,% Soldiers,Birth Rate,Death Rate,Homicides,Life Expectancy,Maternal Death Risk,...,% Population Growth,% Rural Population,Civil Liberties,Freedom of Expression,% Healthcare Investment,% Employment Industry,% Education Expenditure,% Men Employment,% Women Employment,Population
Switzerland,-0.926746,0.899981,,0.938234,-0.314830,0.271648,-0.875918,-0.917309,0.966205,-0.900268,...,0.576820,-0.933934,-0.138955,,0.947935,-0.856593,-0.049844,-0.920018,0.808883,0.936869
Iceland,-0.931491,,,0.841270,-0.412141,-0.709204,0.224062,-0.058655,0.748194,-0.823615,...,0.677257,-0.782214,-0.161636,-0.475646,0.018617,-0.673486,0.392615,0.212204,0.345669,0.767517
Mali,0.816864,-0.746082,,0.979469,-0.320843,-0.954118,-0.987298,,0.987882,-0.962094,...,-0.255112,-0.988937,-0.770610,-0.734670,-0.387323,-0.445123,0.204162,,,0.984256
Latvia,-0.770846,,-0.574529,0.948486,-0.144538,0.698248,0.528614,-0.879670,0.777383,-0.350612,...,0.197481,-0.317310,0.521168,0.613403,0.457060,-0.470058,-0.365042,0.407546,0.874027,-0.863641
Senegal,-0.064157,-0.749358,,0.833206,-0.751124,-0.828994,-0.978344,,0.976512,-0.926409,...,0.912167,-0.960345,0.377412,0.737131,-0.946089,0.928383,0.716600,,,0.950633
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Belarus,-0.740688,0.179766,,0.969914,-0.240457,0.714361,-0.572392,-0.898360,0.758016,-0.919550,...,0.753786,-0.809223,0.372825,0.087215,-0.393318,-0.801186,,,,-0.932844
Uganda,-0.552499,0.457382,,,-0.918504,-0.904790,-0.946195,,0.951210,-0.944073,...,0.653146,-0.935675,-0.710019,-0.768839,-0.787329,0.113859,,,,0.922094
Croatia,-0.713595,,0.369270,0.938671,-0.823153,0.258234,0.529278,-0.645630,0.732922,-0.460155,...,-0.027478,-0.651140,0.025537,-0.347366,0.494168,-0.311657,,,,-0.548240
Lithuania,-0.835335,,-0.788530,0.912241,0.706347,0.574701,0.867250,-0.837935,0.746119,-0.812293,...,0.071064,-0.633390,0.197869,0.077926,0.350933,-0.447208,-0.940626,0.331796,0.637510,-0.922340


In [3]:
import ipywidgets as widgets
import plotly.express as px


indicator = widgets.SelectMultiple(
    options = corr_df.columns.tolist(),
    value = ['AgriShareGDP'],
    description='Indicator',
    disabled=False,
    layout = Layout(width='50%', height='80px')
)


def globalGrapgh(indicator):
    fig = px.choropleth(corr_df, locations = corr_df.index, locationmode='country names', 
                        color= indicator[0],projection="natural earth",
                        color_continuous_scale='Reds',
                    width=700, height=500)
    fig.update(layout_coloraxis_showscale=True)
    fig.show()
    
    
widgets.interactive(globalGrapgh, indicator = indicator) 

interactive(children=(SelectMultiple(description='Indicator', index=(0,), layout=Layout(height='80px', width='…

In [13]:
def tableOut(Threshold, Country):

    df = search(Threshold, Country)
    if df.empty:
        return print("No indicators have been found.")

    left = pd.Series([0.05, 0.05], index=['P-value Pearson', 'P-value Spearman'])
    left2 = pd.Series([-1, -1], index=['GDP Pearson Corr', 'GDP Spearman Corr'])
    left3 = pd.Series([0, 0], index=['GDP Pearson Corr', 'GDP Spearman Corr'])
    df =df.style.highlight_between(left=left, right=1.5, axis=1, props='color:white; background-color:red;')\
        .highlight_between(left=left2, right=1.5, axis=1, props='color:white; background-color:#929bfc;')\
        .highlight_between(left=left3, right=1.5, axis=1, props='color:white; background-color:#b3b9ff;')\
        .format('{:,.4f}', subset = ['GDP Pearson Corr', 'GDP Spearman Corr'])\
        .format('{:,.12f}', subset = ['P-value Pearson', 'P-value Spearman']) 
    
    display(df)

    

@interact(
    Country = sorted(corr_df.index.tolist()),
    Threshold = (0, 1, 0.05))
def g(Country = 'Afghanistan', Threshold = 0.7):
    return tableOut(Threshold,Country)

    
        

interactive(children=(Dropdown(description='Country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…