In [81]:
import os
import pandas as pd

In [82]:
# Saves as dataframes data pulled from OECD
pry_itic = pd.read_csv("PRY ITIC.csv", encoding='unicode_escape')
brz_itic = pd.read_csv("BRZ ITIC.csv", encoding='unicode_escape')


In [83]:
# Displays column names in both datasets for comparison
print(pry_itic.columns, brz_itic.columns)

Index(['ï»¿"REPORTER"', 'Reporter Country', 'PARTNER', 'Partner Country',
       'COM_H3', 'Commodity', 'MEASURE', 'Measure', 'TIME', 'Time', 'Value',
       'Flag Codes', 'Flags'],
      dtype='object') Index(['ï»¿"REPORTER"', 'Reporter Country', 'PARTNER', 'Partner Country',
       'COM_H3', 'Commodity', 'MEASURE', 'Measure', 'TIME', 'Time', 'Value',
       'Flag Codes', 'Flags'],
      dtype='object')


In [84]:
# Measure and MEASURE tell what is shown in Value column.
# We only want the values, not the methods used. 
print(pry_itic['MEASURE'].value_counts(),brz_itic['MEASURE'].value_counts())
print(pry_itic['Measure'].value_counts(),brz_itic['Measure'].value_counts())

MEASURE
1    5435
2    5435
Name: count, dtype: int64 MEASURE
1    2898
2    2898
Name: count, dtype: int64
Measure
Value          5435
Method used    5435
Name: count, dtype: int64 Measure
Value          2898
Method used    2898
Name: count, dtype: int64


In [85]:
# Subsets datasets to only include values in value column, not method used. 
pry_itic = pry_itic[pry_itic['Measure']=='Value']
brz_itic = brz_itic[brz_itic['Measure']=='Value']

In [86]:
# defines which columns we want to keep
# Reporter country for clarity of data
# COM_H3 for merging purposes
# Value - Int'l Transport and Insurance Costs of Merchandise Trade - the data we need
columns_to_keep = ['Reporter Country','COM_H3','Time','Value']

# Saves the dataframes with only those columns that have useful data
pry_itic = pry_itic.loc[:,columns_to_keep]
brz_itic= brz_itic.loc[:,columns_to_keep]

In [87]:
# Renames columns for clarity of information
pry_itic.rename(columns = {'Reporter Country':'reporter',
                     'COM_H3':'hs_4_code',
                     'Value':'itic_rate'},
                     inplace=True)

brz_itic.rename(columns = {'Reporter Country':'reporter',
                     'COM_H3':'hs_4_code',
                     'Value':'itic_rate'},
                     inplace=True)

In [88]:
# Converts itic_rate to be a proper multiplier for later use.
pry_itic['itic_rate'] = pry_itic['itic_rate']+1
brz_itic['itic_rate'] = brz_itic['itic_rate']+1

In [90]:
# Groups each df by the hs_4_code, keeps the first country name, takes the average of itic rates
pry_ave_itic = pry_itic.groupby('hs_4_code').agg({'reporter':'first','itic_rate':'mean'}).reset_index()
brz_ave_itic = brz_itic.groupby('hs_4_code').agg({'reporter':'first','itic_rate':'mean'}).reset_index()

In [91]:
# Renames columns in both dataframes for clarity
pry_ave_itic = pry_ave_itic.rename(columns={'itic_rate':'ave_itic_rate'})
brz_ave_itic = brz_ave_itic.rename(columns={'itic_rate':'ave_itic_rate'})

In [93]:
# Saves cleaned dataframe as a new .csv file for future use. 
pry_ave_itic.to_csv('pry_ave_itic.csv',index=False)
brz_ave_itic.to_csv('brz_ave_itic.csv',index=False)