In [2]:
import pandas as pd
import os

if os.name == 'nt':
    base_dir = "D:/Dropbox/Dropbox (Personal)/College/DR_Paper/"
else:
    base_dir = "/home/j/Dropbox/College/DR_Paper/"

In [4]:
## Build table to convert HS1996 to SITC3 data
tconvdir = base_dir + 'Tariff Conversion/'
hs96sitc3 = tconvdir + 'HS1996 to SITC3.xls'
texcelfile = pd.ExcelFile(hs96sitc3)
tcdf = texcelfile.parse("Conversion Table",
                        converters={'HS96': lambda x: str(x),
                                    'S3': lambda x: str(x)})
tconv = dict(zip(tcdf['HS96'],tcdf['S3']))
tconv.update({'710820':'710820','711890':'711890','070513':'070513',
             '090510':'090510','090590':'090590','090710':'090710',
             '090790':'090790','121290':'121290','200999':'200999',
             '293490':'293490','370192':'370192','390312':'390312',
             '620362':'620362','701091':'701091','701092':'701092',
             '701093':'701093','701094':'701094','731430':'731430',
             '851740':'851740','852840':'852840'})

In [12]:
## Build table to convert HS1996 to ISIC3 data
isic3convdir = base_dir + 'Tariff Conversion/HS1996toISIC3/'
hs96isic3 = isic3convdir + 'HS1996toISIC3.csv'
isic3 = pd.read_csv(hs96isic3, 
                    converters={'HS 1996 Product Code': lambda x: str(x),
                                'ISIC Revision 3 Product Code': lambda x: str(x)})
isic3.columns=['HS96', 'HSdesc', 'ISIC3', 'ISICdesc']
isic3 = isic3[['HS96', 'ISIC3']]

isic3conv = dict(zip(isic3['HS96'],isic3['ISIC3']))

In [8]:
## Merge together WTO provided data and CAFTA DR treaty data
tardir = base_dir + 'Tariffs/HS6/'
caftadir = base_dir + 'Treaties/'

tardf = pd.read_csv(tardir+'TariffsCombined.csv')
caftadf = pd.read_csv(caftadir+'Tariffsagg.csv')    

df = tardf.merge(caftadf, left_on='HS6', right_on='HS1996', how='left')
df = df.drop(['HS1996','DutyCode'],1)

cols = list(df.columns)
for col in cols:
    if 'AvgRate' in col:
        df[col] = df[col].apply(lambda x: 0 if str(x)=='nan' else x)

In [29]:
compdir = base_dir + "DirectoryofCompaniesandEstablishments/"
compoutcsv = base_dir + 'companiestariffs.csv'

## Merge together tariff data and Empresas by municipality
compdirdf = df

lines = [a for a in compdirdf.columns if 'Lines' in a]
compdirdf = compdirdf.drop(lines, 1)

#Convert HS2002 codes into ISIC3 codes
compdirdf['ISIC'] = compdirdf['HS6'].apply(lambda x: "'"+isic3conv[x.replace("'","")])       
compdirdf.drop('HS6',1)
compdirdf = compdirdf.groupby('ISIC', as_index=False)[list(compdirdf.columns)].mean()

#Merge with municipality level company data
companiesdf = pd.read_csv(compdir+'empresasbymunici.csv')

companiesdf = companiesdf.merge(compdirdf, on="ISIC",how='left')
companiesdf.to_csv(compoutcsv, index=False)

In [None]:
## Part below copied from area share companies on 1/27/16 - Need to fix this script anyway

In [None]:
## Compute weighted average of import competing tariff for each
## municipality/province
comptarcsv = '/home/j/Dropbox/College/DR_Paper/companiestariffs.csv'
municicsv = '/home/j/Dropbox/College/DR_Paper/municitariffs.csv'
provcsv = '/home/j/Dropbox/College/DR_Paper/provtariffs.csv'

compdf = pd.read_csv(comptarcsv)

# Reduces data frame down to only "import competing" (wink) industries
# You want to look into this later, of course
compdf = compdf[compdf['duty2007'].astype(str) != 'nan'] 

#Calculate "new" number of companies in province and municipality
compdf = compdf.drop(['MUNICINUM','PROVNUM'],1)
calcdf = compdf
calcdf = calcdf[list(calcdf.columns)[:4]]
municidf2 = calcdf.groupby(['PROVINCE','MUNICIPIO'], as_index=False).sum()
provdf2 = calcdf.groupby('PROVINCE', as_index=False)['EMPRESAS'].sum()
municidf2.columns = ['PROVINCE','MUNICIPIO','MUNICINUM']
provdf2.columns = ['PROVINCE','PROVNUM']

compdf = compdf.merge(municidf2, on=['PROVINCE','MUNICIPIO'], how='left')
compdf = compdf.merge(provdf2, on=['PROVINCE'], how='left')
compdf['MUNSHARE'] = compdf['EMPRESAS']/compdf['MUNICINUM']
compdf['PROVSHARE'] = compdf['EMPRESAS']/compdf['PROVNUM']

## Calculate weighted tariff for each province and municipality
for col in list(compdf.columns)[4:25]: # Might need to check columns
    compdf['wptr'+col] = compdf[col] * compdf['PROVSHARE']
    compdf['wmtr'+col] = compdf[col] * compdf['MUNSHARE']
    
#df.loc[i,'duty'+str(year)]=ctariff(df['Base'][i]

## Sum down to municipality and province tariff averages

# Province level
provtariffdf= compdf
provkeeps = [a for a in provtariffdf.columns if 'wptr' in a]
provkeeps.extend(['PROVINCE'])
provtariffdf = provtariffdf[provkeeps]
provcols = list(provtariffdf.columns)
provcols.remove('PROVINCE')
provtariffdf = provtariffdf.groupby('PROVINCE', as_index=False)[provcols].sum()
provtariffdf.to_csv(provcsv,index=False)

# Municipality level
municitariffdf = compdf
municikeeps = [a for a in municitariffdf.columns if 'wmtr' in a]
municikeeps.extend(['MUNICIPIO'])
municitariffdf = municitariffdf[municikeeps]
municicols = list(municitariffdf.columns)
municicols.remove('MUNICIPIO')
municitariffdf = municitariffdf.groupby('MUNICIPIO', as_index=False)[municicols].sum()
municitariffdf.to_csv(municicsv,index=False)

In [None]:
## Merge together tariff data and U.S. Import/Export data
## This is for the paper I wrote this summer
## DEPRECATED

importdf =  df
outputcsv = base_dir + 'tariffsacrossyears.csv'
impcsv = base_dir + 'ValueExportsImportsDR-US/allyears.csv'

lines = [a for a in importdf.columns if 'Lines' in a]
importdf = importdf.drop(lines, 1)
importdf = importdf.drop(['HS6'],1)

## Aggregate data down to SITC v3 3 digit level
importdf['S3'] = importdf['HS6'].apply(lambda x: tconv[x.replace("'","")][:3])       
importdf = importdf.groupby('S3', as_index=False)[list(importdf.columns)].mean()

## Merge with U.S. Import/Export data
impdf = pd.read_csv(impcsv)
impdf['SITC'] = impdf['SITC'].apply(lambda  x: x.replace("'",""))
importdf = importdf.merge(impdf, left_on='S3', right_on='SITC', how='left')
importdf = importdf.drop(['SITC'],1)
importdf['S3'] = importdf['S3'].apply(lambda x: "'"+x)

importdf.to_csv(outputcsv, index=False)

In [26]:
## Merge income numbers with municipality level tariff data
## Only used for test conducted on 9/13/2015

municitarf = base_dir + "municitariffs.csv"
#munemp = base_dir + "DirectoryofCompaniesandEstablishments/municipalitycodecorrespondence.csv"
income = base_dir + "MUNICIoccinc.csv"


municitarfdf = pd.read_csv(municitarf)
#munempdf = pd.read_csv(munemp,encoding='utf-8')
#munempdf = munempdf.drop([u'PROVINCE_DESC',u'MUNICIPIO_DESC'],1)
incomedf = pd.read_csv(income)
incomedf['MUNICIPIO'] = incomedf['PROV'].astype(str)+'0'+incomedf['MUN'].astype(str)
incomedf.drop(['PROV','MUN'],1, inplace=True)
incomedf.columns = ['INC07', 'INC13','MUNICIPIO']


#municitarfdf = municitarfdf.merge(munempdf, on='MUNICIPIO', how='left')
municitarfdf['MUNICIPIO'] = municitarfdf['MUNICIPIO'].astype(str)
municitarfdf = municitarfdf.merge(incomedf, on='MUNICIPIO', how='left')
municitarfdf.to_csv(base_dir + 'municitarincocc.csv',index=False,encoding='utf-8')

In [38]:
## Merge income numbers with province level tariff data
## Only used for test conducted on 9/13/2015

provtarf = base_dir + "provtariffs.csv"
income = base_dir + "PROVoccinc.csv"


provtarfdf = pd.read_csv(provtarf)
incomedf = pd.read_csv(income)
incomedf.columns = ['PROVINCE','INC07', 'INC13']

provtarfdf = provtarfdf.merge(incomedf, on='PROVINCE', how='left')
provtarfdf.to_csv(base_dir + 'provtarincocc.csv',index=False,encoding='utf-8')