   # Data Aggregation and Combining with Pandas

In [1]:
import pandas as pd

In [2]:
happiness_2015 = pd.read_csv("World_Happiness_2015.csv")
happiness_2016 = pd.read_csv("World_Happiness_2016.csv")

In [3]:
happiness_2015

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
...,...,...,...,...,...,...,...,...,...,...,...,...
153,Rwanda,Sub-Saharan Africa,154,3.465,0.03464,0.22208,0.77370,0.42864,0.59201,0.55191,0.22628,0.67042
154,Benin,Sub-Saharan Africa,155,3.340,0.03656,0.28665,0.35386,0.31910,0.48450,0.08010,0.18260,1.63328
155,Syria,Middle East and Northern Africa,156,3.006,0.05015,0.66320,0.47489,0.72193,0.15684,0.18906,0.47179,0.32858
156,Burundi,Sub-Saharan Africa,157,2.905,0.08658,0.01530,0.41587,0.22396,0.11850,0.10062,0.19727,1.83302


In [4]:
happiness_2015.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        158 non-null    object 
 1   Region                         158 non-null    object 
 2   Happiness Rank                 158 non-null    int64  
 3   Happiness Score                158 non-null    float64
 4   Standard Error                 158 non-null    float64
 5   Economy (GDP per Capita)       158 non-null    float64
 6   Family                         158 non-null    float64
 7   Health (Life Expectancy)       158 non-null    float64
 8   Freedom                        158 non-null    float64
 9   Trust (Government Corruption)  158 non-null    float64
 10  Generosity                     158 non-null    float64
 11  Dystopia Residual              158 non-null    float64
dtypes: float64(9), int64(1), object(2)
memory usage: 1

In [5]:
happiness_2015["Region"].unique()

array(['Western Europe', 'North America', 'Australia and New Zealand',
       'Middle East and Northern Africa', 'Latin America and Caribbean',
       'Southeastern Asia', 'Central and Eastern Europe', 'Eastern Asia',
       'Sub-Saharan Africa', 'Southern Asia'], dtype=object)

In [6]:
happiness_2015["Region"].value_counts()

Sub-Saharan Africa                 40
Central and Eastern Europe         29
Latin America and Caribbean        22
Western Europe                     21
Middle East and Northern Africa    20
Southeastern Asia                   9
Southern Asia                       7
Eastern Asia                        6
North America                       2
Australia and New Zealand           2
Name: Region, dtype: int64

In [7]:
happiness_2015.loc[happiness_2015["Region"] == "Southern Asia","Happiness Score"].mean()

4.580857142857143

In [8]:
region = happiness_2015["Region"].unique()

region

array(['Western Europe', 'North America', 'Australia and New Zealand',
       'Middle East and Northern Africa', 'Latin America and Caribbean',
       'Southeastern Asia', 'Central and Eastern Europe', 'Eastern Asia',
       'Sub-Saharan Africa', 'Southern Asia'], dtype=object)

In [9]:
region_mean = {}

for i in region:
    ser = happiness_2015.loc[happiness_2015["Region"] == i,"Happiness Score"]
    region_mean[i] = ser.mean()

In [10]:
pd.Series(region_mean)

Western Europe                     6.689619
North America                      7.273000
Australia and New Zealand          7.285000
Middle East and Northern Africa    5.406900
Latin America and Caribbean        6.144682
Southeastern Asia                  5.317444
Central and Eastern Europe         5.332931
Eastern Asia                       5.626167
Sub-Saharan Africa                 4.202800
Southern Asia                      4.580857
dtype: float64

## Groupby
    The groupby() function in pandas is used to group rows in a DataFrame based on one or more columns. It allows you to split the data into groups based on specific criteria, such as unique values in a column, and perform calculations or operations on each group.

In [11]:
region_group = happiness_2015.groupby("Region")
region_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002299E6F8400>

In [12]:
region_group.get_group("Southern Asia")["Happiness Score"].mean()

4.580857142857143

In [13]:
region_group.groups

{'Australia and New Zealand': [8, 9], 'Central and Eastern Europe': [30, 43, 44, 51, 53, 54, 55, 58, 59, 61, 63, 68, 69, 72, 76, 79, 82, 85, 86, 88, 92, 94, 95, 103, 105, 110, 126, 129, 133], 'Eastern Asia': [37, 45, 46, 71, 83, 99], 'Latin America and Caribbean': [11, 13, 15, 22, 24, 26, 29, 31, 32, 39, 40, 41, 42, 47, 50, 52, 56, 57, 64, 97, 104, 118], 'Middle East and Northern Africa': [10, 19, 21, 27, 34, 38, 48, 62, 67, 75, 81, 91, 102, 106, 107, 109, 111, 134, 135, 155], 'North America': [4, 14], 'Southeastern Asia': [23, 33, 60, 73, 74, 89, 98, 128, 144], 'Southern Asia': [78, 80, 108, 116, 120, 131, 152], 'Sub-Saharan Africa': [70, 77, 84, 90, 93, 96, 100, 112, 113, 114, 115, 117, 119, 121, 122, 123, 124, 125, 127, 130, 132, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 153, 154, 156, 157], 'Western Europe': [0, 1, 2, 3, 5, 6, 7, 12, 16, 17, 18, 20, 25, 28, 35, 36, 49, 65, 66, 87, 101]}

In [14]:
region_mean = {}

for i in region:
    ser = region_group.get_group("Southern Asia")["Happiness Score"]
    region_mean[i] = ser.mean()

In [15]:
region_mean

{'Western Europe': 4.580857142857143,
 'North America': 4.580857142857143,
 'Australia and New Zealand': 4.580857142857143,
 'Middle East and Northern Africa': 4.580857142857143,
 'Latin America and Caribbean': 4.580857142857143,
 'Southeastern Asia': 4.580857142857143,
 'Central and Eastern Europe': 4.580857142857143,
 'Eastern Asia': 4.580857142857143,
 'Sub-Saharan Africa': 4.580857142857143,
 'Southern Asia': 4.580857142857143}

In [16]:
region_group["Happiness Score"]

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000002299E716070>

In [17]:
region_group["Happiness Score"].mean()

Region
Australia and New Zealand          7.285000
Central and Eastern Europe         5.332931
Eastern Asia                       5.626167
Latin America and Caribbean        6.144682
Middle East and Northern Africa    5.406900
North America                      7.273000
Southeastern Asia                  5.317444
Southern Asia                      4.580857
Sub-Saharan Africa                 4.202800
Western Europe                     6.689619
Name: Happiness Score, dtype: float64

In [18]:
region_group.groups.keys()

dict_keys(['Australia and New Zealand', 'Central and Eastern Europe', 'Eastern Asia', 'Latin America and Caribbean', 'Middle East and Northern Africa', 'North America', 'Southeastern Asia', 'Southern Asia', 'Sub-Saharan Africa', 'Western Europe'])

In [19]:
region_group.size()

Region
Australia and New Zealand           2
Central and Eastern Europe         29
Eastern Asia                        6
Latin America and Caribbean        22
Middle East and Northern Africa    20
North America                       2
Southeastern Asia                   9
Southern Asia                       7
Sub-Saharan Africa                 40
Western Europe                     21
dtype: int64

## Aggregate Function
    Aggregate functions in pandas allow you to perform calculations on groups of data using the groupby() function. These functions help you summarize or derive insights from the grouped data.

In [20]:
hs = region_group["Happiness Score"]

In [21]:
hs.agg(["mean","max","min", "count"])

Unnamed: 0_level_0,mean,max,min,count
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Australia and New Zealand,7.285,7.286,7.284,2
Central and Eastern Europe,5.332931,6.505,4.218,29
Eastern Asia,5.626167,6.298,4.874,6
Latin America and Caribbean,6.144682,7.226,4.518,22
Middle East and Northern Africa,5.4069,7.278,3.006,20
North America,7.273,7.427,7.119,2
Southeastern Asia,5.317444,6.798,3.819,9
Southern Asia,4.580857,5.253,3.575,7
Sub-Saharan Africa,4.2028,5.477,2.839,40
Western Europe,6.689619,7.587,4.857,21


## Pivot Table
    A pivot table is a powerful feature in pandas that allows you to summarize and reshape your data based on one or more columns. It provides a way to group and aggregate data while creating a multidimensional table with hierarchical row and column labels.

In [22]:
happiness_2015.pivot_table(index = "Region", values = "Happiness Score", 
                    aggfunc=["mean","max","min", "count"])

Unnamed: 0_level_0,mean,max,min,count
Unnamed: 0_level_1,Happiness Score,Happiness Score,Happiness Score,Happiness Score
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Australia and New Zealand,7.285,7.286,7.284,2
Central and Eastern Europe,5.332931,6.505,4.218,29
Eastern Asia,5.626167,6.298,4.874,6
Latin America and Caribbean,6.144682,7.226,4.518,22
Middle East and Northern Africa,5.4069,7.278,3.006,20
North America,7.273,7.427,7.119,2
Southeastern Asia,5.317444,6.798,3.819,9
Southern Asia,4.580857,5.253,3.575,7
Sub-Saharan Africa,4.2028,5.477,2.839,40
Western Europe,6.689619,7.587,4.857,21


In [23]:
happiness_2015.pivot_table(index = "Region", values = "Happiness Score", 
                    aggfunc=["mean","max","min", "count"], margins = True)

Unnamed: 0_level_0,mean,max,min,count
Unnamed: 0_level_1,Happiness Score,Happiness Score,Happiness Score,Happiness Score
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Australia and New Zealand,7.285,7.286,7.284,2
Central and Eastern Europe,5.332931,6.505,4.218,29
Eastern Asia,5.626167,6.298,4.874,6
Latin America and Caribbean,6.144682,7.226,4.518,22
Middle East and Northern Africa,5.4069,7.278,3.006,20
North America,7.273,7.427,7.119,2
Southeastern Asia,5.317444,6.798,3.819,9
Southern Asia,4.580857,5.253,3.575,7
Sub-Saharan Africa,4.2028,5.477,2.839,40
Western Europe,6.689619,7.587,4.857,21


## concatenate
    The concat() function in pandas is used to concatenate (i.e., join or combine) two or more DataFrames along a particular axis. It allows you to merge DataFrames vertically or horizontally based on your requirements.

In [24]:
happiness_2015.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Standard Error', 'Economy (GDP per Capita)', 'Family',
       'Health (Life Expectancy)', 'Freedom', 'Trust (Government Corruption)',
       'Generosity', 'Dystopia Residual'],
      dtype='object')

In [25]:
happiness_2016.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Lower Confidence Interval', 'Upper Confidence Interval',
       'Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)',
       'Freedom', 'Trust (Government Corruption)', 'Generosity',
       'Dystopia Residual'],
      dtype='object')

In [26]:
happiness_2015["year"] = 2015
happiness_2016["year"] = 2016

In [27]:
happiness_2015.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738,2015
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201,2015
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204,2015
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531,2015
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176,2015


In [28]:
happiness_2016.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual,year
0,Denmark,Western Europe,1,7.526,7.46,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939,2016
1,Switzerland,Western Europe,2,7.509,7.428,7.59,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,2016
2,Iceland,Western Europe,3,7.501,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016
3,Norway,Western Europe,4,7.498,7.421,7.575,1.57744,1.1269,0.79579,0.59609,0.35776,0.37895,2.66465,2016
4,Finland,Western Europe,5,7.413,7.351,7.475,1.40598,1.13464,0.81091,0.57104,0.41004,0.25492,2.82596,2016


In [29]:
three_2015 = happiness_2015[["Country", "Happiness Score","year"]].head(3)
three_2016 = happiness_2016[["Country", "Happiness Score","year"]].head(3)

In [30]:
three_2015

Unnamed: 0,Country,Happiness Score,year
0,Switzerland,7.587,2015
1,Iceland,7.561,2015
2,Denmark,7.527,2015


In [31]:
three_2016

Unnamed: 0,Country,Happiness Score,year
0,Denmark,7.526,2016
1,Switzerland,7.509,2016
2,Iceland,7.501,2016


In [32]:
pd.concat([three_2015, three_2016], axis = 0)

Unnamed: 0,Country,Happiness Score,year
0,Switzerland,7.587,2015
1,Iceland,7.561,2015
2,Denmark,7.527,2015
0,Denmark,7.526,2016
1,Switzerland,7.509,2016
2,Iceland,7.501,2016


In [33]:
pd.concat([three_2015, three_2016], axis = 0, ignore_index = True)

Unnamed: 0,Country,Happiness Score,year
0,Switzerland,7.587,2015
1,Iceland,7.561,2015
2,Denmark,7.527,2015
3,Denmark,7.526,2016
4,Switzerland,7.509,2016
5,Iceland,7.501,2016


In [34]:
pd.concat([three_2015, three_2016], axis = 1)

Unnamed: 0,Country,Happiness Score,year,Country.1,Happiness Score.1,year.1
0,Switzerland,7.587,2015,Denmark,7.526,2016
1,Iceland,7.561,2015,Switzerland,7.509,2016
2,Denmark,7.527,2015,Iceland,7.501,2016


In [35]:
three_2015 = happiness_2015[["Country", "Happiness Score","year","Standard Error"]].head(3)
three_2016 = happiness_2016[["Country", "Happiness Score","year"]].head(3)

In [36]:
pd.concat([three_2015, three_2016], axis = 0, ignore_index = True)

Unnamed: 0,Country,Happiness Score,year,Standard Error
0,Switzerland,7.587,2015,0.03411
1,Iceland,7.561,2015,0.04884
2,Denmark,7.527,2015,0.03328
3,Denmark,7.526,2016,
4,Switzerland,7.509,2016,
5,Iceland,7.501,2016,


## Merge
    The merge() function in pandas is used to merge two or more DataFrames based on a common column or multiple columns. It allows you to combine DataFrames by performing various types of joins, similar to SQL.

In [37]:
three_2015 = happiness_2015[["Country", "Happiness Score","year"]].head(3)
three_2016 = happiness_2016[["Country", "Happiness Score","year"]].iloc[1:4]

In [38]:
pd.merge(three_2015, three_2016, on = "Country", how = "inner", suffixes=(" 2015"," 2016"))

Unnamed: 0,Country,Happiness Score 2015,year 2015,Happiness Score 2016,year 2016
0,Switzerland,7.587,2015,7.509,2016
1,Iceland,7.561,2015,7.501,2016


In [39]:
pd.merge(three_2015, three_2016, on = "Country", how = "left", suffixes=(" 2015"," 2016"))

Unnamed: 0,Country,Happiness Score 2015,year 2015,Happiness Score 2016,year 2016
0,Switzerland,7.587,2015,7.509,2016.0
1,Iceland,7.561,2015,7.501,2016.0
2,Denmark,7.527,2015,,


In [40]:
pd.merge(three_2015, three_2016, on = "Country", how = "right", suffixes=(" 2015"," 2016"))

Unnamed: 0,Country,Happiness Score 2015,year 2015,Happiness Score 2016,year 2016
0,Switzerland,7.587,2015.0,7.509,2016
1,Iceland,7.561,2015.0,7.501,2016
2,Norway,,,7.498,2016


In [41]:
pd.merge(three_2015, three_2016, on = "Country", how = "outer", suffixes=(" 2015"," 2016"))

Unnamed: 0,Country,Happiness Score 2015,year 2015,Happiness Score 2016,year 2016
0,Switzerland,7.587,2015.0,7.509,2016.0
1,Iceland,7.561,2015.0,7.501,2016.0
2,Denmark,7.527,2015.0,,
3,Norway,,,7.498,2016.0


# Assignment

In [42]:
wh_2015 = pd.read_csv("wh_2015.csv")
wh_2016 = pd.read_csv("wh_2016.csv")
wh_2017 = pd.read_csv("wh_2017.csv")
region = pd.read_csv("region.csv")

In [43]:
wh_2015.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Standard Error', 'Economy (GDP per Capita)', 'Family',
       'Health (Life Expectancy)', 'Freedom', 'Trust (Government Corruption)',
       'Generosity', 'Dystopia Residual', 'Year'],
      dtype='object')

In [44]:
wh_2016.columns

Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Lower Confidence Interval', 'Upper Confidence Interval',
       'Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)',
       'Freedom', 'Trust (Government Corruption)', 'Generosity',
       'Dystopia Residual', 'Year'],
      dtype='object')

In [45]:
wh_2017.columns

Index(['Country', 'Happiness.Rank', 'Happiness.Score', 'Whisker.high',
       'Whisker.low', 'Economy..GDP.per.Capita.', 'Family',
       'Health..Life.Expectancy.', 'Freedom', 'Generosity',
       'Trust..Government.Corruption.', 'Dystopia.Residual', 'Year'],
      dtype='object')

In [46]:
region.columns

Index(['COUNTRY', 'REGION'], dtype='object')

In [47]:
region.rename(columns = {"COUNTRY":"Country","REGION":"Region"}, inplace = True)

In [48]:
merge_1516 = pd.merge(wh_2015, wh_2016, on = ["Country","Region"], how = "inner", suffixes=(" 2015"," 2016"))

In [49]:
merge_1516

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Standard Error,Economy (GDP per Capita) 2015,Family 2015,Health (Life Expectancy) 2015,Freedom 2015,Trust (Government Corruption) 2015,...,Lower Confidence Interval,Upper Confidence Interval,Economy (GDP per Capita) 2016,Family 2016,Health (Life Expectancy) 2016,Freedom 2016,Trust (Government Corruption) 2016,Generosity 2016,Dystopia Residual 2016,Year 2016
0,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,...,7.428,7.590,1.52733,1.14524,0.86303,0.58557,0.41203,0.28083,2.69463,2016
1,Iceland,Western Europe,2.0,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,...,7.333,7.669,1.42666,1.18326,0.86733,0.56624,0.14975,0.47678,2.83137,2016
2,Denmark,Western Europe,3.0,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,...,7.460,7.592,1.44178,1.16374,0.79504,0.57941,0.44453,0.36171,2.73939,2016
3,Norway,Western Europe,4.0,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,...,7.421,7.575,1.57744,1.12690,0.79579,0.59609,0.35776,0.37895,2.66465,2016
4,Canada,North America,5.0,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,...,7.335,7.473,1.44015,1.09610,0.82760,0.57370,0.31329,0.44834,2.70485,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,Rwanda,Sub-Saharan Africa,154.0,3.465,0.03464,0.22208,0.77370,0.42864,0.59201,0.55191,...,3.444,3.586,0.32846,0.61586,0.31865,0.54320,0.50521,0.23552,0.96819,2016
147,Benin,Sub-Saharan Africa,155.0,3.340,0.03656,0.28665,0.35386,0.31910,0.48450,0.08010,...,3.404,3.564,0.39499,0.10419,0.21028,0.39747,0.06681,0.20180,2.10812,2016
148,Syria,Middle East and Northern Africa,156.0,3.006,0.05015,0.66320,0.47489,0.72193,0.15684,0.18906,...,2.936,3.202,0.74719,0.14866,0.62994,0.06912,0.17233,0.48397,0.81789,2016
149,Burundi,Sub-Saharan Africa,157.0,2.905,0.08658,0.01530,0.41587,0.22396,0.11850,0.10062,...,2.732,3.078,0.06831,0.23442,0.15747,0.04320,0.09419,0.20290,2.10404,2016


In [50]:
merge_1617 = pd.merge(merge_1516, wh_2017, on = "Country", how = "inner" ,suffixes=(" 2015-16"," 2017"))

In [51]:
merge_1617

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Standard Error,Economy (GDP per Capita) 2015,Family 2015,Health (Life Expectancy) 2015,Freedom 2015,Trust (Government Corruption) 2015,...,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual,Year
0,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,...,7.561772,7.426227,1.564980,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716,2017
1,Iceland,Western Europe,2.0,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,...,7.622030,7.385970,1.480633,1.610574,0.833552,0.627163,0.475540,0.153527,2.322715,2017
2,Denmark,Western Europe,3.0,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,...,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.355280,0.400770,2.313707,2017
3,Norway,Western Europe,4.0,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,...,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027,2017
4,Canada,North America,5.0,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,...,7.384403,7.247597,1.479204,1.481349,0.834558,0.611101,0.435540,0.287372,2.187264,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,Rwanda,Sub-Saharan Africa,154.0,3.465,0.03464,0.22208,0.77370,0.42864,0.59201,0.55191,...,3.543030,3.398970,0.368746,0.945707,0.326425,0.581844,0.252756,0.455220,0.540061,2017
147,Benin,Sub-Saharan Africa,155.0,3.340,0.03656,0.28665,0.35386,0.31910,0.48450,0.08010,...,3.745784,3.568217,0.431085,0.435300,0.209930,0.425963,0.207948,0.060929,1.885631,2017
148,Syria,Middle East and Northern Africa,156.0,3.006,0.05015,0.66320,0.47489,0.72193,0.15684,0.18906,...,3.663669,3.260331,0.777153,0.396103,0.500533,0.081539,0.493664,0.151347,1.061574,2017
149,Burundi,Sub-Saharan Africa,157.0,2.905,0.08658,0.01530,0.41587,0.22396,0.11850,0.10062,...,3.074690,2.735310,0.091623,0.629794,0.151611,0.059901,0.204435,0.084148,1.683024,2017


In [52]:
final_merge = pd.merge(region,merge_1617,  on = ["Country","Region"] , how = "outer")

In [53]:
final_merge

Unnamed: 0,Country,Region,Happiness Rank 2015,Happiness Score 2015,Standard Error,Economy (GDP per Capita) 2015,Family 2015,Health (Life Expectancy) 2015,Freedom 2015,Trust (Government Corruption) 2015,...,Whisker.high,Whisker.low,Economy..GDP.per.Capita.,Family,Health..Life.Expectancy.,Freedom,Generosity,Trust..Government.Corruption.,Dystopia.Residual,Year
0,Switzerland,Western Europe,1.0,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,...,7.561772,7.426227,1.564980,1.516912,0.858131,0.620071,0.290549,0.367007,2.276716,2017.0
1,Iceland,Western Europe,2.0,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,...,7.622030,7.385970,1.480633,1.610574,0.833552,0.627163,0.475540,0.153527,2.322715,2017.0
2,Denmark,Western Europe,3.0,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,...,7.581728,7.462272,1.482383,1.551122,0.792566,0.626007,0.355280,0.400770,2.313707,2017.0
3,Norway,Western Europe,4.0,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,...,7.594445,7.479556,1.616463,1.533524,0.796667,0.635423,0.362012,0.315964,2.277027,2017.0
4,Canada,North America,5.0,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,...,7.384403,7.247597,1.479204,1.481349,0.834558,0.611101,0.435540,0.287372,2.187264,2017.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,South Sudan,Sub-Saharan Africa,,,,,,,,,...,,,,,,,,,,
162,Congo (Kinshasa),Central Africa,,,,,,,,,...,,,,,,,,,,
163,Congo (Brazzaville),Central Africa,,,,,,,,,...,,,,,,,,,,
164,Congo (Kinshasa),Sub-Saharan Africa,120.0,4.517,0.03680,0.00000,1.00120,0.09806,0.22605,0.07625,...,4.357811,4.202190,0.092102,1.229023,0.191407,0.235961,0.246456,0.060241,2.224959,2017.0


In [54]:
final_merge.to_csv("output.csv",index=False)