In [None]:
!pip install xlrd

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

In [None]:
data = pd.read_excel('/kaggle/input/worldhappinessreport/WHR.xls')
regs = pd.read_excel('/kaggle/input/worldhappinessreport-region/REG.xls')

In [None]:
data.head()

In [None]:
def find_region(country):
    if country in list(regs['Country name'].values):
        return regs[regs['Country name']==country].Region.values[-1]
    return 'None'

recents = data[data.Year == 2018]
recents = recents.dropna(axis=1, how="all")
recents = recents.fillna(recents.median())
recents['Region'] = recents['Country name'].apply(lambda x: find_region(x))

In [None]:
recents.head()

In [None]:
data[['Year', 'Life Ladder']].set_index('Year').boxplot(by='Year', grid=False)
plt.suptitle("")
plt.title('Life Ladder')
plt.xlabel('Year')

In [None]:
data.groupby('Year')['Life Ladder'].count().plot()
plt.title('Countries per Year')
plt.xlabel('Year')
plt.ylabel('Countries')

In [None]:
def create_scatter(col, nc, nr, index):
    plt.subplot(nc, nr, index)
    render = data.sample(frac=0.3)
    plt.scatter(render[col], render['Life Ladder'])    
    plt.title(str(col)[:20])

i = 1
plt.figure(figsize = (16, 16))
for key in ['Log GDP per capita',
            'Social support', 
            'Healthy life expectancy at birth',
            'Freedom to make life choices', 
            'Generosity',
            'Perceptions of corruption',
            'Positive affect', 
            'Negative affect',
            'Confidence in national government', 
            'Democratic Quality',
            'Delivery Quality']:
    create_scatter(key, 4, 3, i)
    i += 1
plt.show()
    

In [None]:
def calculate_correlation(col):
    render = data.sample(frac=0.3)
    r = np.corrcoef(data[col], data['Life Ladder'])[0][1]
    print(col+" Life Ladder Correlation: ", r)

for key in ['Log GDP per capita',
            'Social support', 
            'Healthy life expectancy at birth',
            'Freedom to make life choices', 
            'Generosity',
            'Perceptions of corruption',
            'Positive affect', 
            'Negative affect',
            'Confidence in national government', 
            'Democratic Quality',
            'Delivery Quality']:
    calculate_correlation(key)

In [None]:
plt.figure(figsize = (8, 8))
plt.bar(recents.groupby('Region')['Country name'].count().index,
        recents.groupby('Region')['Country name'].count(),
        width=0.5, 
        bottom=None, 
        align='center')
plt.xticks(rotation='vertical')
plt.show()

In [None]:
plt.figure(figsize = (8, 8))
data[['Year', 'Life Ladder']].set_index('Year').boxplot(by='Year', 
                                                        grid=False, 
                                                        figsize=(8, 8))
plt.suptitle("")
plt.title('Life Ladder')
plt.xlabel('Year')
plt.show()

In [None]:
print(data.groupby('Year')['Country name'].count())

In [None]:
t = data[data['Year']==2005].copy()
countries = list(t['Country name'].values)
filtered = data[data['Country name'].isin(countries)]

filtered[['Year', 'Life Ladder']].set_index('Year').boxplot(by='Year', 
                                                            grid=False, 
                                                            figsize=(8, 8))
plt.suptitle("")
plt.title('Life Ladder - Same Countries')
plt.xlabel('Year')

In [None]:
from sklearn.manifold import t_sne

t = t_sne.TSNE()
data = data.fillna(data.median())
transformed = t.fit_transform(data[['Log GDP per capita',
       'Social support', 'Healthy life expectancy at birth',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption','Positive affect', 'Negative affect',
       'Confidence in national government', 'Democratic Quality',
       'Delivery Quality']].values)
    
plt.scatter(transformed[:,0], transformed[:,1], c=data['Life Ladder'].values)

regions = {x: 0 for x in regs.Region.unique()}
i = 0
for r in regions:
    regions[r] = i
    i += 1
regions['None'] = i
    
plt.scatter(transformed[:,0], transformed[:,1], c=data['Region'].apply(lambda x: regions[x]).values)