Purpose: 
Used to take HS6 tariff data from the WTO, provided in seperate excel files,
and convert it into a useable csv file.

Also used to take the already processed CAFTA-DR treaty categories (DRTariff.csv created using tariffschedulemerging.py), and compute estimated Dominican tariffs on U.S. imports for 2007-on.

In [2]:
import os 
import pandas as pd
import string
import xlrd
import csv
from openpyxl import load_workbook
from unidecode import unidecode

In [3]:
# Build tariff conversion dictionary from HS 1996 to HS 2002
if os.name == 'nt':
    tconvdir ="D:/Dropbox/Dropbox (Personal)/College/DR_Paper/Tariff Conversion/"
else:
    tconvdir ="/home/j/Dropbox/College/DR_Paper/Tariff Conversion/"

hs02to96 = tconvdir + 'HS2002 to HS1996.xls'
texcelfile = pd.ExcelFile(hs02to96)
tcdf = texcelfile.parse("Conversion Table",header=1,
                        converters={'HS 2002': lambda x: str(x),
                                    'HS 1996': lambda x: str(x)})


tconv = dict(zip(tcdf['HS 2002'],tcdf['HS 1996']))
tconv.update({'710820':'710820','711890':'711890','070513':'070513',
             '090510':'090510','090590':'090590','090710':'090710',
             '090790':'090790','121290':'121290','200999':'200999',
             '293490':'293490','370192':'370192','390312':'390312',
             '620362':'620362','701091':'701091','701092':'701092',
             '701093':'701093','701094':'701094','731430':'731430',
             '851740':'851740','852840':'852840'})
hs6 = list(tcdf['HS 1996'].unique())
hs6.extend(['711890','710820'])

In [96]:
#HS6 WTO Tariff Data
if os.name == 'nt':
    tdir ="D:/Dropbox/Dropbox (Personal)/College/DR_Paper/Tariffs/HS6/"
else:
    tdir ="/home/j/Dropbox/College/DR_Paper/Tariffs/HS6/"

maindf = pd.DataFrame(hs6, columns=['HS6'])
maindf['DutyCode'] = "02'"

for path, dirs, files in os.walk(tdir):
    for inputfl in sorted(files)[:]:
        if '.xls' in inputfl:
            year = inputfl.split('_')[0]
            if int(year) < 2007:
                excelfile = pd.ExcelFile(tdir+inputfl)
                df = excelfile.parse("DutyDetails")

                dropcols = [u'Base Nomenclature',u'Query Name',u'Classification',u'Reporter',u'Reporter Code',
                            u'Partner',u'Specific Duty Rate',u'Duty Description',
                            u'Sub Heading',u'Duty National Description',
                           'Minimum Rate','Maximum Rate']
                            #'Number Of Lines',Duty Status','Duty Type/Code']
                df = df.drop(dropcols,1)

                df.columns = [#year+'HSYr',
                              'HS6','DutyCode', 
                              #year+'MinRate',year+'MaxRate',
                              year+'AvgRate', year+'NumLines','DutyStatus']
                
                df['DutyStatus'] = df['DutyStatus'].apply(lambda x: 1 if x=="Dutiable" else 0)
                #df.to_csv(tdir+inputfl.split('.')[0]+'.csv',index=False)
                df['HS6']=df['HS6'].apply(lambda x: x.replace("'",""))
                                
                #Split table into duty free and dutiable items
                dutyfree = df[df['DutyStatus'] == 0]
                dutyfree = dutyfree.drop([year+'AvgRate','DutyStatus'],1)
                dutyfree.columns = ['HS6', 'DutyCode', year+'DFLines']
                df = df[df['DutyStatus'] == 1]
                df = df.drop(['DutyStatus'],1)
                
                if 2003 <= int(year) < 2007:
                    df['HS6']=df['HS6'].apply(lambda x: tconv[x])
                    dutyfree['HS6']=dutyfree['HS6'].apply(lambda x: tconv[x])
                    
                    comdf = df.groupby(['HS6','DutyCode'], as_index=False)[year+'AvgRate'].mean()
                    linesdf = df.groupby(['HS6','DutyCode'], as_index=False)[year+'NumLines'].sum()
                    df = comdf.merge(linesdf, on=['HS6','DutyCode'], how='left')
                    
                    dutyfree = dutyfree.groupby(['HS6','DutyCode'], as_index=False)[year+'DFLines'].sum()
                
                maindf = maindf.merge(dutyfree, on=['HS6','DutyCode'], how='left')
                maindf = maindf.merge(df, on=['HS6','DutyCode'], how='left')

    
maindf = maindf.sort(columns=['HS6'])
maindf['HS6'] = maindf['HS6'].apply(lambda x: "'"+str(x))
maindf.to_csv(tdir+'TariffsCombined.csv',index=False)    

1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006


In [46]:
# CAFTA-DR Tariff Calculation
#You should probably restart the notebook to avoid risk of similarly named
#local variables from above script
if os.name == 'nt':
    tdir ="D:/Dropbox/Dropbox (Personal)/College/DR_Paper/Treaties/"
else:
    tdir ="/home/j/Dropbox/College/DR_Paper/Treaties/"

inputfl = tdir+"DRTariff.csv"
df = pd.read_csv(inputfl,encoding="utf-8")

def reductionform(base,numstages,year):
    rbase=base/float(numstages+1)
    return base - (year+1)*rbase

def ctariff(base,year,category):
    if category == "A":
        return 0 # Tariff is now duty free for all years forward
    
    elif category == "B":
        if year < 5:
            return reductionform(base,5,year) # Reduced in 5 equal stages
        else:
            return 0 # Good is duty free by 2012
        
    elif category == "C":
        if year < 10:
            return reductionform(base,10,year) #Reduced in 10 equal stages
        else:
            return 0 # Good is duty free by 2017
    
    elif category == "D":
        if year < 15:
            return reductionform(base,15,year) #Reduced in 15 equal stages
        else:
            return 0 # Good is duty free by 2022
    
    elif category == "G":
        if base != 0:
            raise Exception
        return 0 # Good remains duty free, throw error if not the case
    
    elif category == "M": #NEED TO UNIT TEST THIS
        if year < 2:
            rbase = base*(.02)
            return base - (year+1)*rbase
        elif year < 8:
            rbase = base*(.08)
            return ((.96)*base)-((year-1)*rbase)
        elif year < 10:
            return ((.32)*base)-((year-8)*(.16)*base)
        else:
            return 0 # Good is duty free by 2017
        
    elif category == "N":
        if year < 12:
            return reductionform(base,12,year) #Reduced in 12 equal stages
        else:
            return 0 # Good is duty free by 2019
    
    elif category == "SP":
        return "SP" # Specific conditions outlined in appendix; i.e. quotas
    
    elif category == "V":
        return base # Remains at base rate until 2017, not coding this
        
    elif category == "W":
        if year < 4:
            return reductionform(base,4,year) # Reduced in 4 equal stages
        else:
            return 0 # Good is duty free by 2011
        
    elif category == "X":
        if year == 0:
            return base
        elif year < 5:
            return reductionform(base,4,year-1) # Reduced in 4 equal stages
        else:
            return 0 # Good is duty free by 2012
        
    elif category == "Y": #NEED TO UNIT TEST THIS
        if year < 5:
            rbase = base*(.15)
            return base - (year+1)*rbase
        elif year < 10:
            rbase = base*(.05)
            return ((.25)*base)-((year-4)*rbase)
        else:
            return 0 # Good is duty free by 2017
    
    else:
        raise Exception # Raise exception if category not recognized

for year in range(2007,2015+1):
    df['duty'+str(year)] = 0
    for i in df.index:
        df.loc[i,'duty'+str(year)]=ctariff(df['Base'][i],year-2007,df['Cat'][i])

df['HS1996']=df['HS6'].apply(lambda x: "'"+tconv[x.replace("'","")])
outputfl = tdir+"DRTariffallyears.csv"
df.to_csv(outputfl,encoding="utf-8", index=False) 

In [6]:
#CAFTA DR Tariff Conversion
#Merges together equal HS6 
if os.name == 'nt':
    tdir ="D:/Dropbox/Dropbox (Personal)/College/DR_Paper/Treaties/"
else:
    tdir ="/home/j/Dropbox/College/DR_Paper/Treaties/"

df = pd.read_csv(tdir+"DRTariffallyears.csv", encoding="utf-8")

df = df[df['Cat'] != "SP"]
dropcols = [u'HS8', u'Description',u'Category',u'HS4',u'HS6',u'Cat']
df = df.drop(dropcols,1)

cols = list(df.columns)
cols.remove('HS1996')


for col in cols:
    df[col]=df[col].apply(lambda x: float(x))

df = df.groupby(['HS1996'], as_index=False)[cols].mean()

df.to_csv(tdir+'Tariffsagg.csv',index=False)