# Joining happy data with API data - world_hapiness_joined

    01 Import Data via API
    02 Data Wrangling
    03 Join Data
    04 Export world_hapiness_joined.csv

In [1]:
import quandl
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.api as sm # Using .api imports the public access version of statsmodels, which is a library that handles 
# statistical models.
import os
import warnings # This is a library that handles warnings.

warnings.filterwarnings("ignore") # Disable deprecation warnings that could indicate, for instance, a suspended library or 
# feature. These are more relevant to developers and very seldom to analysts.

plt.style.use('fivethirtyeight') # This is a styling option for how your plots will appear. More examples here:
# https://matplotlib.org/3.2.1/tutorials/introductory/customizing.html
# https://matplotlib.org/3.1.0/gallery/style_sheets/fivethirtyeight.html

In [2]:
path = r'C:\Users\Lenovo\OneDrive\Dokumente\CareerFoundry\Data Immersion\6. Advanced Analytics and Dashboard Design'

# Import Data world_hapiness
df_happy = pd.read_csv(os.path.join(path, 'BRICS', '02 Data', 'Prepared Data', 'world_hapiness_cleaned1.csv'))


In [3]:
df_happy.head()

Unnamed: 0.1,Unnamed: 0,country,year,continent,least_developed,life_expectancy,population,co2_emissions,health_expenditure,electric_power_consumption,forest_area,gdp_per_capita,internet_users,military_exp,open_defecation,drinking_water,obesity_among_adults,beer_cons_per_capita
0,0,Albania,2000,Europe,False,73.955,3089027,1.026213,7.23337,1414.703784,28.076642,3860.804627,0.114097,1.24636,0.888853,86.754471,12.8,1.33431
1,1,Albania,2001,Europe,False,74.288,3060173,1.055496,7.139524,1449.647413,28.123248,4299.546493,0.325798,1.309291,0.836397,86.90407,13.3,1.48995
2,2,Albania,2002,Europe,False,74.579,3051010,1.232379,6.909341,1351.230796,28.169854,4661.402695,0.390081,1.320034,0.781899,87.451635,13.9,1.28697
3,3,Albania,2003,Europe,False,74.828,3039616,1.338985,7.06349,1578.165919,28.21646,5000.049363,0.9719,1.336843,0.728191,87.987194,14.4,1.4483
4,4,Albania,2004,Europe,False,75.039,3026939,1.404059,6.773372,1469.264539,28.263066,5427.732662,2.420388,1.381158,0.675281,88.510583,15.0,1.37617


In [4]:
from datetime import timedelta

In [5]:
df_happy.dtypes

Unnamed: 0                      int64
country                        object
year                            int64
continent                      object
least_developed                  bool
life_expectancy               float64
population                      int64
co2_emissions                 float64
health_expenditure            float64
electric_power_consumption    float64
forest_area                   float64
gdp_per_capita                float64
internet_users                float64
military_exp                  float64
open_defecation               float64
drinking_water                float64
obesity_among_adults          float64
beer_cons_per_capita          float64
dtype: object

## 01 Import Data via API

### Import Data

In [6]:
import nasdaqdatalink

In [7]:
nasdaqdatalink


<module 'nasdaqdatalink' from 'C:\\Users\\Lenovo\\anaconda3\\Lib\\site-packages\\nasdaqdatalink\\__init__.py'>

In [8]:
# Configure API key 

nasdaqdatalink.ApiConfig.api_key = 'bp2zW4s1iBsxAXLXoVxY'

In [9]:
# Liste der Ländercodes, die du abrufen möchtest
countries = ['USA', 'BRA', 'CHN', 'IND', 'RUS', 'ZAF', 'FRA', 'DEU', 'CAN', 'JAP', 'ITA', 'GBR']  # Füge hier alle Länder hinzu, die du benötigst

# Liste der Indikatoren, die du abrufen möchtest
indicators = ['LE', 'GGXWDG_NGDP', 'GGXWDN', 'PPPSH', 'NGDPD', 'LP', 'BCA_NGDPD', 'BCA']  # Füge hier alle gewünschten Indikatoren hinzu

# LE - Employment, Millions
# NGDPD - GDP
# LP - Population
# GGXWDG_NGDP  - General Government Net Debt, % of GDP	 Staatsverschuldung
# GGXWDN - Debt
# BCA_NGDPD - Current Account Balance, % of GDP	 Außenhandelsbilanz
#  BCA-  Current Account Balance, USD Billions	




# DataFrame erstellen, um die Daten zu speichern
df= pd.DataFrame()

# Daten für jedes Land und jeden Indikator abrufen
for country in countries:
    for indicator in indicators:
        indicator_code = f'{country}_{indicator}'
        data = nasdaqdatalink.get_table('QDL/ODA', indicator=indicator_code)
        
        # Wenn Daten vorhanden sind und die Spalte 'date' vorhanden ist
        if not data.empty and 'date' in data.columns:
            # Daten auf den Zeitraum zwischen 2000 und 2020 filtern
            data = data[data['date'].between('1999-01-01', '2019-12-31')]
            
            # Füge die gefilterten Daten zum Ergebnis-DataFrame hinzu
            df = pd.concat([df, data], ignore_index=True)

# Zeige den aktualisierten DataFrame an
print(df)

     indicator       date    value
0       USA_LE 2019-12-31  157.534
1       USA_LE 2018-12-31  155.763
2       USA_LE 2017-12-31  153.335
3       USA_LE 2016-12-31  151.436
4       USA_LE 2015-12-31  148.845
...        ...        ...      ...
1843   GBR_BCA 2003-12-31  -38.644
1844   GBR_BCA 2002-12-31  -36.481
1845   GBR_BCA 2001-12-31  -29.891
1846   GBR_BCA 2000-12-31  -31.156
1847   GBR_BCA 1999-12-31  -35.694

[1848 rows x 3 columns]


In [10]:
# Splitte die Spalte 'indicator' am Unterstrich
df[['country', 'indicator1']] = df['indicator'].str.split('_', n=1, expand=True)

# Zeige den aktualisierten DataFrame an
print(df)

     indicator       date    value country indicator1
0       USA_LE 2019-12-31  157.534     USA         LE
1       USA_LE 2018-12-31  155.763     USA         LE
2       USA_LE 2017-12-31  153.335     USA         LE
3       USA_LE 2016-12-31  151.436     USA         LE
4       USA_LE 2015-12-31  148.845     USA         LE
...        ...        ...      ...     ...        ...
1843   GBR_BCA 2003-12-31  -38.644     GBR        BCA
1844   GBR_BCA 2002-12-31  -36.481     GBR        BCA
1845   GBR_BCA 2001-12-31  -29.891     GBR        BCA
1846   GBR_BCA 2000-12-31  -31.156     GBR        BCA
1847   GBR_BCA 1999-12-31  -35.694     GBR        BCA

[1848 rows x 5 columns]


In [11]:
df['country'].value_counts()

country
USA    168
BRA    168
CHN    168
IND    168
RUS    168
ZAF    168
FRA    168
DEU    168
CAN    168
ITA    168
GBR    168
Name: count, dtype: int64

In [12]:
df.columns

Index(['indicator', 'date', 'value', 'country', 'indicator1'], dtype='object')

In [13]:
# Die Spalte 'indicator' vor dem Mergen entfernen
df.drop('indicator', axis=1, inplace=True)


### Create a pivot

In [14]:


# Pivot-Tabelle erstellen
pivot_df = df.pivot_table(index=['date', 'country'], columns='indicator1', values='value', aggfunc='first').reset_index()



# Zeige den aktualisierten DataFrame an
print(pivot_df)


indicator1       date country      BCA  BCA_NGDPD  GGXWDG_NGDP     GGXWDN  \
0          1999-12-31     BRA  -26.784     -4.467          NaN        NaN   
1          1999-12-31     CAN    1.212      0.179       88.966    509.011   
2          1999-12-31     CHN   21.114      1.940       21.863        NaN   
3          1999-12-31     DEU  -31.251     -1.422       60.387    988.376   
4          1999-12-31     FRA   66.260      4.433       60.493    707.300   
..                ...     ...      ...        ...          ...        ...   
226        2019-12-31     IND  -24.549     -0.866       75.040        NaN   
227        2019-12-31     ITA   66.607      3.311      134.136   2186.421   
228        2019-12-31     RUS   65.650      3.872       13.748        NaN   
229        2019-12-31     USA -445.958     -2.086      108.745  17757.971   
230        2019-12-31     ZAF   -9.976     -2.568       56.217   2846.119   

indicator1       LE        LP      NGDPD   PPPSH  
0               NaN   17

In [15]:
pivot_df.shape


(231, 10)

In [16]:
# LE - Employment, Millions
# NGDPD - GDP
# LP - Population
# GGXWDG_NGDP  - General Government Net Debt, % of GDP	 Staatsverschuldung
# GGXWDN - Debt
# BCA_NGDPD - Current Account Balance, % of GDP	 Außenhandelsbilanz
#  BCA-  Current Account Balance, USD Billions	

# renaming columns
pivot_df.rename(columns={'GGXWDG_NGDP' : 'debt_GDP', 'GGXWDN' : 'debt', 'LE' : 'employment', 'PPPSH' : 'share_of_world_GDP_ppp', 'LP':'population1', 'NGDPD': 'gdp', 'BCA_NGDPD':'account_balance_GDP', 'BCA':'account_balance'}, inplace = True)

In [17]:
pivot_df['country'].value_counts()

country
BRA    21
CAN    21
CHN    21
DEU    21
FRA    21
GBR    21
IND    21
ITA    21
RUS    21
USA    21
ZAF    21
Name: count, dtype: int64

In [18]:
pivot_df.tail()

indicator1,date,country,account_balance,account_balance_GDP,debt_GDP,debt,employment,population1,gdp,share_of_world_GDP_ppp
226,2019-12-31,IND,-24.549,-0.866,75.04,,,1383.112,2835.606,7.028
227,2019-12-31,ITA,66.607,3.311,134.136,2186.421,23.109,59.817,2011.524,1.97
228,2019-12-31,RUS,65.65,3.872,13.748,,,146.749,1695.724,3.081
229,2019-12-31,USA,-445.958,-2.086,108.745,17757.971,157.534,328.547,21380.95,15.751
230,2019-12-31,ZAF,-9.976,-2.568,56.217,2846.119,,58.775,388.446,0.618


#### +1 day für pivot_df

In [19]:
# Einen Tag zu den Datumswerten in der 'date'-Spalte hinzufügen
pivot_df['date'] = pd.to_datetime(pivot_df['date']) + timedelta(days=1)

# Zeige den aktualisierten DataFrame an
print(pivot_df.head())



indicator1       date country  account_balance  account_balance_GDP  debt_GDP  \
0          2000-01-01     BRA          -26.784               -4.467       NaN   
1          2000-01-01     CAN            1.212                0.179    88.966   
2          2000-01-01     CHN           21.114                1.940    21.863   
3          2000-01-01     DEU          -31.251               -1.422    60.387   
4          2000-01-01     FRA           66.260                4.433    60.493   

indicator1     debt  employment  population1       gdp  share_of_world_GDP_ppp  
0               NaN         NaN      171.256   599.642                   3.144  
1           509.011      14.408       30.367   678.417                   1.809  
2               NaN         NaN     1257.860  1088.346                   6.996  
3           988.376      35.932       81.423  2197.125                   4.836  
4           707.300      24.017       58.497  1494.634                   3.354  


## 02 Data Wrangling

In [20]:
pivot_df.shape

(231, 10)

In [21]:
pivot_df.columns

Index(['date', 'country', 'account_balance', 'account_balance_GDP', 'debt_GDP',
       'debt', 'employment', 'population1', 'gdp', 'share_of_world_GDP_ppp'],
      dtype='object', name='indicator1')

In [22]:
pivot_df.describe()

indicator1,date,account_balance,account_balance_GDP,debt_GDP,debt,employment,population1,gdp,share_of_world_GDP_ppp
count,231,231.0,231.0,227.0,164.0,126.0,231.0,231.0,231.0
mean,2009-12-31 20:34:17.142857216,-23.855727,0.111532,66.40904,2284.679128,46.07531,322.611078,3376.527615,5.080139
min,2000-01-01 00:00:00,-816.647,-5.911,7.446,196.033,14.408,30.367,129.385,0.618
25%,2005-01-01 00:00:00,-47.5275,-2.7935,41.522,672.0285,22.33575,59.3165,1246.195,2.4255
50%,2010-01-01 00:00:00,-8.978,-0.735,67.546,1352.503,27.3235,81.423,1949.657,3.116
75%,2015-01-01 00:00:00,32.1165,1.919,85.1765,1860.28325,37.235,292.061,2842.022,4.732
max,2020-01-01 00:00:00,420.569,16.309,135.367,17757.971,157.534,1410.08,21380.95,20.437
std,,179.737512,3.884077,29.606677,3389.149838,44.38859,459.850148,4349.807245,5.10416


#### drop columns

In [23]:
# Check for columns with missing values
pivot_df.isnull().sum()


indicator1
date                        0
country                     0
account_balance             0
account_balance_GDP         0
debt_GDP                    4
debt                       67
employment                105
population1                 0
gdp                         0
share_of_world_GDP_ppp      0
dtype: int64

In [24]:
# Dropping column
pivot_df = pivot_df.drop(['debt'], axis=1)
pivot_df = pivot_df.drop(['employment'], axis=1)


### missing values

In [25]:
pivot_df['country'].value_counts()

country
BRA    21
CAN    21
CHN    21
DEU    21
FRA    21
GBR    21
IND    21
ITA    21
RUS    21
USA    21
ZAF    21
Name: count, dtype: int64

In [26]:
missing_data = pivot_df[pivot_df.isnull().any(axis=1)]
missing_data

indicator1,date,country,account_balance,account_balance_GDP,debt_GDP,population1,gdp,share_of_world_GDP_ppp
0,2000-01-01,BRA,-26.784,-4.467,,171.256,599.642,3.144
9,2000-01-01,USA,-286.609,-2.976,,279.195,9631.175,20.437
10,2000-01-01,ZAF,-0.68,-0.449,,44.23,151.426,0.76
20,2001-01-01,USA,-401.917,-3.921,,282.296,10250.95,20.297


In [27]:
#  just elder data missing

In [28]:
df_happy.columns


Index(['Unnamed: 0', 'country', 'year', 'continent', 'least_developed',
       'life_expectancy', 'population', 'co2_emissions', 'health_expenditure',
       'electric_power_consumption', 'forest_area', 'gdp_per_capita',
       'internet_users', 'military_exp', 'open_defecation', 'drinking_water',
       'obesity_among_adults', 'beer_cons_per_capita'],
      dtype='object')

In [29]:
df_happy.shape

(2373, 18)

In [30]:
df_happy.head()

Unnamed: 0.1,Unnamed: 0,country,year,continent,least_developed,life_expectancy,population,co2_emissions,health_expenditure,electric_power_consumption,forest_area,gdp_per_capita,internet_users,military_exp,open_defecation,drinking_water,obesity_among_adults,beer_cons_per_capita
0,0,Albania,2000,Europe,False,73.955,3089027,1.026213,7.23337,1414.703784,28.076642,3860.804627,0.114097,1.24636,0.888853,86.754471,12.8,1.33431
1,1,Albania,2001,Europe,False,74.288,3060173,1.055496,7.139524,1449.647413,28.123248,4299.546493,0.325798,1.309291,0.836397,86.90407,13.3,1.48995
2,2,Albania,2002,Europe,False,74.579,3051010,1.232379,6.909341,1351.230796,28.169854,4661.402695,0.390081,1.320034,0.781899,87.451635,13.9,1.28697
3,3,Albania,2003,Europe,False,74.828,3039616,1.338985,7.06349,1578.165919,28.21646,5000.049363,0.9719,1.336843,0.728191,87.987194,14.4,1.4483
4,4,Albania,2004,Europe,False,75.039,3026939,1.404059,6.773372,1469.264539,28.263066,5427.732662,2.420388,1.381158,0.675281,88.510583,15.0,1.37617


### country names pivot

In [31]:
print(df_happy['country'].unique())
print(df_happy['year'].unique())
print(pivot_df['country'].unique())
print(pivot_df['date'].dt.year.unique())

['Albania' 'Algeria' 'Angola' 'Argentina' 'Armenia' 'Australia' 'Austria'
 'Bahrain' 'Bangladesh' 'Belarus' 'Belgium' 'Benin' 'Bolivia' 'Botswana'
 'Brazil' 'Brunei Darussalam' 'Bulgaria' 'Cambodia' 'Cameroon' 'Canada'
 'Chile' 'China' 'Colombia' 'Costa Rica' "Cote d'Ivoire" 'Croatia'
 'Cyprus' 'Czechia' 'Denmark' 'Ecuador' 'El Salvador' 'Eritrea' 'Estonia'
 'Ethiopia' 'Finland' 'France' 'Gabon' 'Georgia' 'Germany' 'Ghana'
 'Greece' 'Guatemala' 'Honduras' 'Hungary' 'India' 'Indonesia' 'Iraq'
 'Ireland' 'Israel' 'Italy' 'Jamaica' 'Jordan' 'Kazakhstan' 'Kenya'
 'Kuwait' 'Kyrgyz Republic' 'Latvia' 'Libya' 'Lithuania' 'Luxembourg'
 'Malaysia' 'Malta' 'Mauritius' 'Mexico' 'Moldova' 'Mongolia' 'Montenegro'
 'Morocco' 'Mozambique' 'Myanmar' 'Namibia' 'Nepal' 'Netherlands'
 'New Zealand' 'Nicaragua' 'Niger' 'Nigeria' 'North Macedonia' 'Norway'
 'Oman' 'Pakistan' 'Panama' 'Paraguay' 'Peru' 'Philippines' 'Poland'
 'Portugal' 'Qatar' 'Romania' 'Russian Federation' 'Saudi Arabia'
 'Senegal' 'Serbi

In [32]:
print(pivot_df['country'].unique())


['BRA' 'CAN' 'CHN' 'DEU' 'FRA' 'GBR' 'IND' 'ITA' 'RUS' 'USA' 'ZAF']


#### mapping der pivot

In [33]:
# Liste der Ländercodes und vollständigen Namen
country_mapping = {
    'USA': 'United States',
    'BRA': 'Brazil',
    'CHN': 'China',
    'IND': 'India',
    'RUS': 'Russian Federation',
    'ZAF': 'South Africa',
    'FRA': 'France',
    'DEU': 'Germany',
    'CAN': 'Canada',
    'JAP': 'Japan',
    'ITA': 'Italy',
    'GBR': 'United Kingdom'
}

# Setze die 'country'-Spalte in pivot_df zurück und setze die Ländercodes direkt
pivot_df['country'] = pivot_df['country'].apply(lambda x: country_mapping.get(x, x))



In [34]:
pivot_df['country'].value_counts()

country
Brazil                21
Canada                21
China                 21
Germany               21
France                21
United Kingdom        21
India                 21
Italy                 21
Russian Federation    21
United States         21
South Africa          21
Name: count, dtype: int64

In [35]:
df_happy['country'].value_counts()

country
Albania     21
Libya       21
Peru        21
Paraguay    21
Panama      21
            ..
Finland     21
Ethiopia    21
Estonia     21
Eritrea     21
Zimbabwe    21
Name: count, Length: 113, dtype: int64

### Create filtered df_happy

In [36]:
selected_countries = ['United States', 'Brazil', 'China', 'India', 'Russian Federation', 'South Africa', 'France', 'Germany', 'Canada', 'Japan', 'Italy', 'United Kingdom']

# Filtere df_happy basierend auf den ausgewählten Ländern
df_happy_filtered = df_happy[df_happy['country'].isin(selected_countries)]



In [37]:
df_happy['country'].value_counts()

country
Albania     21
Libya       21
Peru        21
Paraguay    21
Panama      21
            ..
Finland     21
Ethiopia    21
Estonia     21
Eritrea     21
Zimbabwe    21
Name: count, Length: 113, dtype: int64

In [38]:
df_happy_filtered['country'].value_counts()

country
Brazil                21
Canada                21
China                 21
France                21
Germany               21
India                 21
Italy                 21
Russian Federation    21
South Africa          21
United Kingdom        21
United States         21
Name: count, dtype: int64

In [39]:

# Schritt 3: Überprüfen der ausgewählten Länder in beiden DataFrames
print("Ausgewählte Länder in df_happy:", set(df_happy['country']))
print("Ausgewählte Länder in pivot_df:", set(pivot_df['country']))
print("Ausgewählte Länder in df_happy_filtered:", set(df_happy_filtered['country']))


Ausgewählte Länder in df_happy: {'France', 'Turkiye', 'Eritrea', 'United States', 'Namibia', 'Sudan', 'Brazil', 'India', 'United Kingdom', 'Tanzania', 'Benin', 'Panama', 'Germany', 'Kuwait', 'Peru', 'Romania', 'Tajikistan', 'Brunei Darussalam', 'Finland', 'Morocco', 'Kyrgyz Republic', 'Nicaragua', 'Jamaica', 'Netherlands', 'Ethiopia', 'Ukraine', 'Italy', 'Switzerland', 'Ireland', 'Honduras', 'Bolivia', 'Estonia', 'Mexico', 'Canada', 'Belarus', 'Portugal', 'Lithuania', 'Gabon', 'Spain', 'Kazakhstan', 'Pakistan', 'Cameroon', 'Denmark', 'Norway', 'Iraq', 'Belgium', 'El Salvador', 'Albania', 'Armenia', 'Montenegro', 'Moldova', 'Kenya', 'Argentina', 'Austria', 'Israel', 'Togo', 'Nigeria', 'Ecuador', 'Mauritius', 'Angola', 'Sweden', 'Jordan', 'Poland', 'Nepal', 'Saudi Arabia', 'Malaysia', 'Indonesia', 'Czechia', 'Oman', 'New Zealand', 'Bangladesh', 'Slovenia', 'Latvia', 'Malta', 'Mongolia', 'Russian Federation', 'Algeria', 'Luxembourg', 'Ghana', 'Philippines', 'Sri Lanka', 'Cyprus', 'Zimbabw

In [40]:
df_happy_filtered.columns


Index(['Unnamed: 0', 'country', 'year', 'continent', 'least_developed',
       'life_expectancy', 'population', 'co2_emissions', 'health_expenditure',
       'electric_power_consumption', 'forest_area', 'gdp_per_capita',
       'internet_users', 'military_exp', 'open_defecation', 'drinking_water',
       'obesity_among_adults', 'beer_cons_per_capita'],
      dtype='object')

#### check der typen

In [41]:
df_happy_filtered.dtypes


Unnamed: 0                      int64
country                        object
year                            int64
continent                      object
least_developed                  bool
life_expectancy               float64
population                      int64
co2_emissions                 float64
health_expenditure            float64
electric_power_consumption    float64
forest_area                   float64
gdp_per_capita                float64
internet_users                float64
military_exp                  float64
open_defecation               float64
drinking_water                float64
obesity_among_adults          float64
beer_cons_per_capita          float64
dtype: object

In [42]:
pivot_df.dtypes

indicator1
date                      datetime64[ns]
country                           object
account_balance                  float64
account_balance_GDP              float64
debt_GDP                         float64
population1                      float64
gdp                              float64
share_of_world_GDP_ppp           float64
dtype: object

In [43]:
pivot_df['date'] = pd.to_datetime(pivot_df['date'])
df_happy_filtered.rename(columns={'year': 'date'}, inplace=True)
df_happy_filtered['date'] = pd.to_datetime(df_happy_filtered['date'], format='%Y')

## 03 Join Data

In [44]:


# Führe den Join durch
joined_df = df_happy_filtered.set_index(['country', 'date']).join(pivot_df.set_index(['country', 'date']), how='inner')

# Setze die Indexspalten zurück, falls gewünscht
joined_df.reset_index(inplace=True)

print("Joined DataFrame:")
print(joined_df)

# Führe den Join erneut durch
#merged_df = pd.merge(df_happy_filtered, pivot_df, how='inner', on=['country', 'date'])
#print("Merged DataFrame:")
#print(merged_df)

Joined DataFrame:
           country       date  Unnamed: 0      continent  least_developed  \
0           Brazil 2000-01-01         294  South America            False   
1           Brazil 2001-01-01         295  South America            False   
2           Brazil 2002-01-01         296  South America            False   
3           Brazil 2003-01-01         297  South America            False   
4           Brazil 2004-01-01         298  South America            False   
..             ...        ...         ...            ...              ...   
226  United States 2016-01-01        2390  North America            False   
227  United States 2017-01-01        2391  North America            False   
228  United States 2018-01-01        2392  North America            False   
229  United States 2019-01-01        2393  North America            False   
230  United States 2020-01-01        2394  North America            False   

     life_expectancy  population  co2_emissions  health_e

In [45]:
# Überprüfe Spaltennamen und -reihenfolge
print("Spalten in df_happy_filtered:", df_happy_filtered.columns)
print("Spalten in pivot_df:", pivot_df.columns)


Spalten in df_happy_filtered: Index(['Unnamed: 0', 'country', 'date', 'continent', 'least_developed',
       'life_expectancy', 'population', 'co2_emissions', 'health_expenditure',
       'electric_power_consumption', 'forest_area', 'gdp_per_capita',
       'internet_users', 'military_exp', 'open_defecation', 'drinking_water',
       'obesity_among_adults', 'beer_cons_per_capita'],
      dtype='object')
Spalten in pivot_df: Index(['date', 'country', 'account_balance', 'account_balance_GDP', 'debt_GDP',
       'population1', 'gdp', 'share_of_world_GDP_ppp'],
      dtype='object', name='indicator1')


In [46]:
joined_df.shape


(231, 24)

In [47]:
joined_df.head()

Unnamed: 0.1,country,date,Unnamed: 0,continent,least_developed,life_expectancy,population,co2_emissions,health_expenditure,electric_power_consumption,...,open_defecation,drinking_water,obesity_among_adults,beer_cons_per_capita,account_balance,account_balance_GDP,debt_GDP,population1,gdp,share_of_world_GDP_ppp
0,Brazil,2000-01-01,294,South America,False,70.116,174790339,1.79455,8.334593,1832.378306,...,8.910026,93.471528,13.5,3.3405,-26.784,-4.467,,171.256,599.642,3.144
1,Brazil,2001-01-01,295,South America,False,70.462,177196051,1.80241,8.549606,1897.353148,...,8.36775,93.814337,14.0,3.35201,-26.531,-4.048,65.561,173.766,655.454,3.132
2,Brazil,2002-01-01,296,South America,False,70.813,179537523,1.769881,8.696857,1747.939631,...,7.845655,94.142485,14.5,3.31994,-24.89,-4.445,70.054,176.209,559.982,3.101
3,Brazil,2003-01-01,297,South America,False,71.17,181809244,1.709539,8.188999,1806.669684,...,7.334106,94.463523,15.0,3.17642,-9.407,-1.845,78.798,178.499,509.798,3.108
4,Brazil,2004-01-01,298,South America,False,71.531,184006479,1.785372,8.12492,1882.264028,...,6.833189,94.777359,15.5,3.21237,2.193,0.393,73.823,180.708,558.232,3.021


In [48]:
joined_df.describe()

Unnamed: 0.1,date,Unnamed: 0,life_expectancy,population,co2_emissions,health_expenditure,electric_power_consumption,forest_area,gdp_per_capita,internet_users,...,open_defecation,drinking_water,obesity_among_adults,beer_cons_per_capita,account_balance,account_balance_GDP,debt_GDP,population1,gdp,share_of_world_GDP_ppp
count,231,231.0,231.0,231.0,231.0,231.0,176.0,231.0,231.0,231.0,...,231.0,231.0,187.0,220.0,231.0,231.0,227.0,231.0,231.0,231.0
mean,2009-12-31 20:34:17.142857216,1241.545455,74.918736,324831600.0,8.191583,8.653155,6442.636663,31.693807,27225.486909,51.933188,...,5.065497,95.8476,19.459893,3.244569,-23.855727,0.111532,66.40904,322.611078,3376.527615,5.080139
min,2000-01-01 00:00:00,294.0,53.444,30685730.0,0.887014,2.858494,392.039693,12.210143,2093.26599,0.527532,...,0.0,80.461195,1.5,0.06578,-816.647,-5.911,7.446,30.367,129.385,0.618
25%,2005-01-01 00:00:00,456.5,70.778329,59489280.0,5.019614,5.440282,3207.73614,22.471796,12565.277085,27.631058,...,0.0,93.439704,17.6,1.82,-47.5275,-2.7935,41.522,59.3165,1246.195,2.4255
50%,2010-01-01 00:00:00,982.0,77.992683,82348670.0,7.662605,8.696857,5715.97871,31.075092,29062.12387,58.141735,...,0.0,99.074887,21.2,3.60488,-8.978,-0.735,67.546,81.423,1949.657,3.116
75%,2015-01-01 00:00:00,2063.5,80.781707,294160900.0,10.961313,10.401273,7285.766275,38.714226,40013.20145,76.99,...,1.944971,99.934327,24.55,4.2,32.1165,1.919,85.1765,292.061,2842.022,4.732
max,2020-01-01 00:00:00,2394.0,83.497561,1411100000.0,20.469807,18.815826,17264.73674,65.934359,65120.39466,94.818201,...,74.102948,100.000003,37.3,7.16115,420.569,16.309,135.367,1410.08,21380.95,20.437
std,,762.840314,7.39177,462319700.0,4.967648,3.333857,4535.839559,14.003919,16264.370068,29.212535,...,13.75829,5.496441,8.767501,1.652558,179.737512,3.884077,29.606677,459.850148,4349.807245,5.10416


In [49]:
joined_df.columns

Index(['country', 'date', 'Unnamed: 0', 'continent', 'least_developed',
       'life_expectancy', 'population', 'co2_emissions', 'health_expenditure',
       'electric_power_consumption', 'forest_area', 'gdp_per_capita',
       'internet_users', 'military_exp', 'open_defecation', 'drinking_water',
       'obesity_among_adults', 'beer_cons_per_capita', 'account_balance',
       'account_balance_GDP', 'debt_GDP', 'population1', 'gdp',
       'share_of_world_GDP_ppp'],
      dtype='object')

## 04 EXPORT world_hapiness_joined.csv

In [50]:
# save the dataframe as a csv in prepeared data folder
joined_df.to_csv(os.path.join(path, 'BRICS', '02 Data', 'Prepared Data', 'world_hapiness_joined.csv'))