In [1]:
import pandas as pd

# Vertical Concatenation

## Download Data

Create the local directory *data* and download the dataset files (2015.csv, 2016.csv, 2017.csv) published at the link https://www.kaggle.com/unsdsn/world-happiness to the local data directory.

## Processing

In [2]:
# load data
whr_2015 = pd.read_csv('data/2015.csv', sep=',')
whr_2016 = pd.read_csv('data/2016.csv', sep=',')
whr_2017 = pd.read_csv('data/2017.csv', sep=',')

# set the year variable to distinguish rows in the combined data frame later
whr_2015['year'] = 2015
whr_2016['year'] = 2016
whr_2016['year'] = 2017

print(whr_2015.shape)
print(whr_2016.shape)
print(whr_2017.shape)

(158, 13)
(157, 14)
(155, 12)


In [3]:
whr = pd.concat([whr_2015, whr_2016], axis=0, sort=False)

print(whr.shape)

(315, 15)


In [4]:
whr_2015.head(2)

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201,2015


In [5]:
whr_2016.head(2)

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939,2017
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,2017


In [6]:
whr.head(2)

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year,Lower Confidence Interval,Upper Confidence Interval
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015,,
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201,2015,,


# Horizontal Concatenation

## Download Data

Create the local directory *data* and download the dataset file (countries of the world.csv) published at the link https://www.kaggle.com/fernandol/countries-of-the-world to the local data directory.

## Processing

In [7]:
whr_2017 = pd.read_csv('data/2017.csv', sep=',')
countries = pd.read_csv('data/countries of the world.csv', sep=',')

# removing spaces around the country names so as to match them with the names in whr_2017 later
countries['Country'] = countries['Country'].str.strip()

In [8]:
# setting the index which will be used to combine two data frame 'horizontally'
whr_2017 = whr_2017.set_index('Country')
countries = countries.set_index('Country')

In [9]:
print(whr_2017.shape)
print(countries.shape)

(155, 11)
(227, 19)


In [10]:
whr_2017_ext = pd.concat([whr_2017, countries], axis=1, sort=False, ignore_index=False, join='inner')

print(whr_2017_ext.shape)

(140, 30)


In [11]:
whr_2017_ext.head(10)

Unnamed: 0_level_0,Happiness.Rank,Happiness.Score,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,...,Phones (per 1000),Arable (%),Crops (%),Other (%),Climate,Birthrate,Deathrate,Agriculture,Industry,Service
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Norway,1,7.537,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,...,4617,287,0,9713,3.0,1146,94,21,415,564
Denmark,2,7.522,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.35528,0.40077,...,6146,5402,19,4579,3.0,1113,1036,18,246,735
Iceland,3,7.504,7.62203,7.38597,1.480633,1.610574,0.833552,0.627163,0.47554,0.153527,...,6477,7,0,9993,3.0,1364,672,86,15,765
Switzerland,4,7.494,7.561772,7.426227,1.56498,1.516912,0.858131,0.620071,0.290549,0.367007,...,6809,1042,61,8897,3.0,971,849,15,34,645
Finland,5,7.469,7.527542,7.410458,1.443572,1.540247,0.809158,0.617951,0.245483,0.382612,...,4053,719,3,9278,3.0,1045,986,28,295,676
Netherlands,6,7.377,7.427426,7.326574,1.503945,1.428939,0.810696,0.585384,0.47049,0.282662,...,4608,2671,97,7232,3.0,109,868,21,244,736
Canada,7,7.316,7.384403,7.247597,1.479204,1.481349,0.834558,0.611101,0.43554,0.287372,...,5522,496,2,9502,,1078,78,22,294,684
New Zealand,8,7.314,7.37951,7.24849,1.405706,1.548195,0.81676,0.614062,0.500005,0.382817,...,4417,56,699,8741,3.0,1376,753,43,273,684
Sweden,9,7.284,7.344095,7.223905,1.494387,1.478162,0.830875,0.612924,0.385399,0.384399,...,7150,654,1,9345,3.0,1027,1031,11,282,707
Australia,10,7.284,7.356651,7.211349,1.484415,1.510042,0.843887,0.601607,0.477699,0.301184,...,5655,655,4,9341,1.0,1214,751,38,262,7
