In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings

warnings.filterwarnings("ignore")

from jupyterthemes import jtplot

jtplot.style(theme="monokai", context="notebook", ticks=True, grid=False)
df = pd.read_csv("happiness-report.csv")
df.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [16]:
selected_countries = df[df["Country or region"].isin(["Finland", "Denmark"])]
print(selected_countries)

   Overall rank Country or region  Score  GDP per capita  Social support  \
0             1           Finland  7.769           1.340           1.587   
1             2           Denmark  7.600           1.383           1.573   

   Healthy life expectancy  Freedom to make life choices  Generosity  \
0                    0.986                         0.596       0.153   
1                    0.996                         0.592       0.252   

   Perceptions of corruption  
0                      0.393  
1                      0.410  


In [17]:
# Check the number of non-null values in the dataframe
happy_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB


In [18]:
# Obtain the Statistical summary of the dataframe
happy_df.describe()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
count,156.0,156.0,156.0,156.0,156.0,156.0,156.0,156.0
mean,78.5,5.407096,0.905147,1.208814,0.725244,0.392571,0.184846,0.110603
std,45.177428,1.11312,0.398389,0.299191,0.242124,0.143289,0.095254,0.094538
min,1.0,2.853,0.0,0.0,0.0,0.0,0.0,0.0
25%,39.75,4.5445,0.60275,1.05575,0.54775,0.308,0.10875,0.047
50%,78.5,5.3795,0.96,1.2715,0.789,0.417,0.1775,0.0855
75%,117.25,6.1845,1.2325,1.4525,0.88175,0.50725,0.24825,0.14125
max,156.0,7.769,1.684,1.624,1.141,0.631,0.566,0.453


In [19]:
# check the number of duplicated entries in the dataframe
happy_df.duplicated().sum() # since there are no duplicates, no further action is required

np.int64(0)

In [20]:
max_happiness_country = df.loc[df['Score'].idxmax()]
print(max_happiness_country)

Overall rank                          1
Country or region               Finland
Score                             7.769
GDP per capita                     1.34
Social support                    1.587
Healthy life expectancy           0.986
Freedom to make life choices      0.596
Generosity                        0.153
Perceptions of corruption         0.393
Name: 0, dtype: object


In [21]:
df.tail()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.35,0.517,0.361,0.0,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.0,0.105,0.225,0.235,0.035
155,156,South Sudan,2.853,0.306,0.575,0.295,0.01,0.202,0.091


In [2]:
can = df[df["Country or region"] == "Canada"]
print(can)

   Overall rank Country or region  Score  GDP per capita  Social support  \
8             9            Canada  7.278           1.365           1.505   

   Healthy life expectancy  Freedom to make life choices  Generosity  \
8                    1.039                         0.584       0.285   

   Perceptions of corruption  
8                      0.308  


In [4]:
df.isnull().sum()

Overall rank                    0
Country or region               0
Score                           0
GDP per capita                  0
Social support                  0
Healthy life expectancy         0
Freedom to make life choices    0
Generosity                      0
Perceptions of corruption       0
dtype: int64

In [6]:
df[df['Score'] == 7.769000]

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393


In [10]:
max_happiness_country = df.loc[df['Score'].idxmax()]
print(max_happiness_country)

Overall rank                          1
Country or region               Finland
Score                             7.769
GDP per capita                     1.34
Social support                    1.587
Healthy life expectancy           0.986
Freedom to make life choices      0.596
Generosity                        0.153
Perceptions of corruption         0.393
Name: 0, dtype: object


In [11]:
max_corruption = df.loc[df['Perceptions of corruption'].idxmax()]
print(max_corruption)

Overall rank                           34
Country or region               Singapore
Score                               6.262
GDP per capita                      1.572
Social support                      1.463
Healthy life expectancy             1.141
Freedom to make life choices        0.556
Generosity                          0.271
Perceptions of corruption           0.453
Name: 33, dtype: object


np.float64(0.453)

In [14]:
df[df['Score'] == df['Score'].max()]

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393


In [18]:
fig = px.scatter_matrix(df[['Score', 'GDP per capita', 'Social support', 'Healthy life expectancy', 'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']], width=1500, height=1500, title='Scatter Matrix of Happiness Report')
fig.show()