# Tratamiento Base

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('GlobalLandTemperaturesByCountry.csv')

In [3]:
df['year'] = pd.to_datetime(df['dt'])

In [4]:
idx = df['year'] > pd.to_datetime('1970-01-01')
df_temp = df[idx]

In [5]:
df_t_avg = df_temp.groupby(['Country',
           pd.Grouper(key='year',freq='1Y')]).agg({'AverageTemperature':[np.mean,np.median]})

In [6]:
df_t = df_t_avg['AverageTemperature'][['median']].reset_index()

In [7]:
df_t['date'] = df_t['year'].dt.year

In [8]:
df_t.rename(columns={'median':'temperature'},inplace=True)

In [9]:
df_agri = pd.read_csv('AGRI.K2_DS2.csv', header = 2)
df_fore = pd.read_csv('LND.FRST.K2_DS2.csv', header = 2)
df_elec = pd.read_csv('USE.ELEC.KH.PC_DS2.csv', header = 2)
df_co2e = pd.read_csv('ATM.CO2E.KT_DS2.csv', header = 2)
df_popu = pd.read_csv('POP.TOTL_DS2.csv', header = 2)

In [10]:
# map(str,range(1971,2015)) ## lista de todos los años que se desea analizar en formato string
cols= ['Country Name','Country Code'] + list(map(str,range(1971,2015))) # lista de columnas que deseo extraer de los dataframe

In [11]:
def fun_format(df,col='agriculture'):
    return df.loc[:,cols].melt(id_vars=['Country Name','Country Code']).rename(
        columns ={'variable':'date',
                  'Country Name':'Country',
                  'Country Code':'name',
                  'value':col})

In [12]:
df_agri = fun_format(df_agri,col='agriculture')
df_fore = fun_format(df_fore,col='forest')
df_elec = fun_format(df_elec,col='electricprod')
df_co2e = fun_format(df_co2e,col='co2')
df_popu = fun_format(df_popu,col='population')

In [13]:
df_popu['date'] = df_popu['date'].astype(float)
df_fore['date'] = df_fore['date'].astype(float)
df_agri['date'] = df_agri['date'].astype(float)
df_elec['date'] = df_elec['date'].astype(float)
df_co2e['date'] = df_co2e['date'].astype(float)

In [14]:
df_merge = pd.merge(df_t[['Country','temperature','date']],
                   df_popu, on = ['Country','date'],how='inner')

df_merge = pd.merge(df_merge,
                    df_co2e,
                    on = ['Country','name','date'],
                    how = 'inner')

df_merge = pd.merge(df_merge,
                    df_fore,
                    on = ['Country','name','date'],
                    how = 'inner')

df_merge = pd.merge(df_merge,
                    df_elec,
                    on = ['Country','name','date'],
                    how = 'inner')

df_merge = pd.merge(df_merge,
                    df_agri,
                    on = ['Country','name','date'],
                    how = 'inner')

In [15]:
df_merge

Unnamed: 0,Country,temperature,date,name,population,co2,forest,electricprod,agriculture
0,Afghanistan,14.9340,1971,AFG,11475445.0,1895.839,,,380360.0
1,Afghanistan,13.8725,1972,AFG,11791215.0,1532.806,,,380460.0
2,Afghanistan,14.9130,1973,AFG,12108963.0,1639.149,,,380480.0
3,Afghanistan,13.6160,1974,AFG,12412950.0,1917.841,,,380480.0
4,Afghanistan,13.5130,1975,AFG,12689160.0,2126.860,,,380480.0
...,...,...,...,...,...,...,...,...,...
7219,Zimbabwe,22.4860,2009,ZWE,12526968.0,5603.176,159510.000000,585.456912,163000.0
7220,Zimbabwe,23.3040,2010,ZWE,12697723.0,7803.376,156240.000000,606.644199,162000.0
7221,Zimbabwe,22.9420,2011,ZWE,12894316.0,9622.208,153115.996094,636.249337,164000.0
7222,Zimbabwe,23.0455,2012,ZWE,13115131.0,7873.049,149992.001953,608.762505,162000.0


In [16]:
df_climate = df_merge.dropna()

In [19]:
dir_pandas = '~/Documents/Estadistica/Multivariado/Taller 3/{}'.format('CCA.csv')
dir_pandas

'~/Documents/Estadistica/Multivariado/Taller 3/CCA.csv'

In [18]:
df_climate

Unnamed: 0,Country,temperature,date,name,population,co2,forest,electricprod,agriculture
62,Albania,13.4490,1990,ALB,3286542.0,5515.168,7887.999878,552.252185,11210.0
63,Albania,11.5885,1991,ALB,3266790.0,4286.723,7868.499756,418.453589,11270.0
64,Albania,13.5850,1992,ALB,3247039.0,2515.562,7849.000244,453.644074,11270.0
65,Albania,13.6500,1993,ALB,3227287.0,2335.879,7829.500122,536.363825,11260.0
66,Albania,13.2390,1994,ALB,3207536.0,1925.175,7810.000000,596.407959,11260.0
...,...,...,...,...,...,...,...,...,...
7219,Zimbabwe,22.4860,2009,ZWE,12526968.0,5603.176,159510.000000,585.456912,163000.0
7220,Zimbabwe,23.3040,2010,ZWE,12697723.0,7803.376,156240.000000,606.644199,162000.0
7221,Zimbabwe,22.9420,2011,ZWE,12894316.0,9622.208,153115.996094,636.249337,164000.0
7222,Zimbabwe,23.0455,2012,ZWE,13115131.0,7873.049,149992.001953,608.762505,162000.0


In [20]:
df_climate.to_csv(dir_pandas, index = False)