**I work on the base environment**

In [7]:
import pandas as pd
import numpy as np

### Insert all GPI data with the corresponding fips_10_4 codes as created in jupyter file named "Dataframe_GPI_score_fips_10_4_codes_journal."

In [11]:
gpifips = pd.read_csv('data/Gpi_fips_10_4.csv')

In [12]:
gpifips.head(3)

Unnamed: 0,Country,FIPS_10_4,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,AF,3.35,3.426,3.485,3.392,3.442,3.353,3.321,3.293,3.391,3.347,3.35,3.3,3.644,3.631
1,Albania,AL,1.943,1.916,1.912,1.938,1.976,1.995,1.96,1.948,1.9,1.92,1.843,1.821,1.872,1.824
2,Algeria,AG,2.347,2.347,2.37,2.512,2.355,2.314,2.283,2.261,2.197,2.188,2.188,2.219,2.287,2.31


Delete the columns with the full country names

In [13]:
del gpifips['Country']

Transpose the dataframe so that I have one row per each country and year.<br>  

In [14]:
Ngpifips = gpifips.set_index('FIPS_10_4', append=True).stack().reset_index().drop('level_0',axis=1)

Rename columns

In [15]:
Ngpifips.columns=['country','year','gpi_score']

In [16]:
Ngpifips.head(1)

Unnamed: 0,country,year,gpi_score
0,AF,2008,3.35


In [17]:
countries=Ngpifips['country'].unique()
countries

array(['AF', 'AL', 'AG', 'AO', 'AR', 'AM', 'AS', 'AU', 'AJ', 'BA', 'BG',
       'BO', 'BE', 'BN', 'BT', 'BL', 'BK', 'BC', 'BR', 'BU', 'UV', 'BY',
       'CB', 'CM', 'CA', 'CT', 'CD', 'CI', 'CH', 'CO', 'CS', 'IV', 'HR',
       'CU', 'CY', 'EZ', 'CG', 'DA', 'DJ', 'DR', 'TT', 'EC', 'EG', 'ES',
       'EK', 'ER', 'EN', 'WZ', 'ET', 'FI', 'FR', 'GB', 'GA', 'GG', 'GM',
       'GH', 'GR', 'GT', 'GV', 'PU', 'GY', 'HA', 'HO', 'HU', 'IC', 'IN',
       'ID', 'IR', 'IZ', 'EI', 'IS', 'IT', 'JM', 'JA', 'JO', 'KZ', 'KE',
       'KV', 'KU', 'KG', 'LA', 'LG', 'LE', 'LT', 'LI', 'LY', 'LH', 'MA',
       'MI', 'MY', 'ML', 'MR', 'MP', 'MX', 'MD', 'MG', 'MJ', 'MO', 'MZ',
       'BM', 'WA', 'NP', 'NL', 'NZ', 'NU', 'NG', 'NI', 'KN', 'MK', 'NO',
       'MU', 'PK', 'WE', 'PM', 'PP', 'PA', 'PE', 'RP', 'PL', 'PO', 'QA',
       'CF', 'RO', 'RS', 'RW', 'SA', 'SG', 'RI', 'SL', 'SN', 'LO', 'SI',
       'SO', 'SF', 'KS', 'OD', 'SP', 'CE', 'SU', 'SW', 'SZ', 'SY', 'TW',
       'TI', 'TZ', 'TH', 'TO', 'TD', 'TS', 'TU', 'T

Control whether year data are datetime data that we need for the interpolation

In [18]:
Ngpifips.year.dtype

dtype('O')

They are object data, so let's convert them to datetime.

In [19]:
Ngpifips['year'] = pd.to_datetime(Ngpifips['year'])

In [20]:
Ngpifips.year.dtype

dtype('<M8[ns]')

Verify whether my machine is accepting '<M8[ns]' as datetime

In [21]:
np.dtype('datetime64[ns]') == np.dtype('<M8[ns]')

True

### Interpolation for each country

**Upsample (= When you increase the frequency of the samples, such as from minutes to seconds. VS downsample**) the countries' database in order to create nans and then interpolate it.<br>

In [23]:
for c in countries:
    single_country = Ngpifips.loc[Ngpifips.country == c] #loc is looking for location (if I try to print single country and break it gives me the first country dataset that creates)
    single_country = single_country.drop('country',axis=1)
    single_country.set_index('year', inplace = True) #I set the year as the index since this way the data should be inserted for interpolation#(inplace=True)->it modifies the initial dataframe without needing to rename it
    upsampled = single_country.resample('MS') #Upsample per month in order to create nan gpi values for the rest of the 11 months that are needed for the interpolation
    gpi_interp = upsampled.interpolate(method='linear')#Interpolate linearly
    firstdate = gpi_interp.index[0]#get the first date to help create a new index with extra dates
    extradates = pd.date_range(firstdate, periods = len(gpi_interp)+2 , freq='MS') #create the index column with the dates and add the two extra dates to reach the last month which is March
    gpi_interp = gpi_interp.reindex(extradates) #change the index column with the new index column
    shifted = gpi_interp.shift(2).dropna() #I give the GPI yearly value to the month of March
    newind = shifted.index.strftime('%Y%m') #Turn the index from YYMMDD to YYMM, so that I can easily merge later.
    shifted.index = newind #Pass the new index format to the dataframe.
    shifted.reset_index(inplace=True)#Reset the index so that avoid having MonthYear column as an index.
    shifted.columns = ['MonthYear','GPI_score']
    shifted.to_csv('data/interpolated_gpi/interpolated_gpi_%s.csv' %c,index=False)  