In [None]:
import pandas as pd
import requests
import json
import getpass

In [None]:
#Import country_codes CSV file that contains all the country codes from WeatherBit
country_codes_OECD = pd.read_csv('country_codes_OECD.csv', engine='python')

In [None]:
country_codes_OECD.head()

In [None]:
#Remove whitespaces from column names
country_codes_OECD.columns = country_codes_OECD.columns.str.replace(' ','')

In [None]:
#Look at shape of the dataframe
country_codes_OECD.shape

In [None]:
#Investigate null values
country_codes_OECD.isna().sum()

In [None]:
#Column 'Unnamed:2' can be dropped
country_codes_OECD = country_codes_OECD.drop(['Unnamed:2'], axis=1)

In [None]:
#Investigate the solitary non-null value in 'Unnamed:3' column
country_codes_OECD.groupby(by=["Unnamed:3"]).sum()

In [None]:
#Column 'Unnamed:3' can be dropped
country_codes_OECD = country_codes_OECD.drop(['Unnamed:3'], axis=1)

In [None]:
country_codes_OECD['code']

In [None]:
code_list = country_codes_OECD['code'].tolist()
print(code_list)

In [None]:
for code in code_list:
    if len(code) > 3:
        print(code)

In [None]:
#Codes that are exactly 4 characters long are continents or former countries, so they can be dropped
four_codes = country_codes_OECD.loc[country_codes_OECD['code'].str.len() == 4].index
country_codes_OECD = country_codes_OECD.drop(four_codes)
#'KOREA-SO' can be deleted
row_drop = country_codes_OECD.loc[country_codes_OECD['code'] == 'KOREA-SO'].index
country_codes_OECD = country_codes_OECD.drop(row_drop)
#'KOREA-NO' can be changed to 'PRK'
#'KOREA-NS' can be changed to 'KOR'                                                              
country_codes_OECD = country_codes_OECD.replace(to_replace={'KOREA-NO': 'PRK', 'KOREA-NS': 'KOR'})
#Reset index
country_codes_OECD.reset_index(drop=True, inplace=True)

In [None]:
#All remaining country codes can have their prefix removed
prefix_codes = country_codes_OECD.loc[country_codes_OECD['code'].str.len() == 8].index
prefix_codes

In [None]:
country_codes_OECD["code"].iloc[prefix_codes] = country_codes_OECD["code"].iloc[prefix_codes].str.slice(start=5)

In [None]:
country_codes_OECD["code"].tolist()

In [None]:
#Export data to CSV file
country_codes_OECD = country_codes_OECD.to_csv('country_codes_final.csv', index=False)

In [None]:
#Import and clean 'protected_areas' CSV file

In [None]:
protected_areas = pd.read_csv('protected_areas.csv')

In [None]:
protected_areas.head()

In [None]:
#Investigate null values
protected_areas.isna().sum()

In [None]:
#Delete 'Flag Codes' column
protected_areas.drop('Flag Codes', axis=1, inplace=True)

In [None]:
#Check for columns that have only one unique value, and drop columns that do
for col in protected_areas.columns:
    if len(protected_areas[col].unique()) == 1:
        protected_areas.drop(col, axis=1, inplace=True)

In [None]:
protected_areas

In [None]:
#Use pivot function to create new columns for each year in the dataframe
protected_areas = protected_areas.pivot(index='LOCATION', columns='TIME')
protected_areas.head()

In [None]:
#Reintroduce index, moving country codes into the dataframe
protected_areas.reset_index(inplace=True)

In [None]:
protected_areas.head()

In [None]:
protected_areas.columns

In [None]:
#Remove multiindex
[x for x in protected_areas.columns]

In [None]:
[x[1] for x in protected_areas.columns]

In [None]:
protected_areas.columns = [x[1] for x in protected_areas.columns]

In [None]:
#Rename column containing country codes
protected_areas.rename(columns={ protected_areas.columns[0]: "country_code" }, inplace = True)

In [None]:
protected_areas.head()

In [None]:
protected_areas = protected_areas.to_csv('protected_areas.csv', index=False)