In [None]:
import urllib3
import csv
import re

In [None]:
http = urllib3.PoolManager()

In [None]:
prefixURL = "http://nisatapps.prio.org/Results_SQL.aspx?C1="
suffixURL = "&C2=-2&p=Exports&Dep1=0&Dep2=False&r=True&W=100&dtl=2&Y=All%20Years&d=99&t=3&dls=True&csv=True&EY=All%20Years&scp=3"

header = 'Reporter_Code,Reporter_Name,Partner_Code,Partner_Name,ImportOrExport,Year,Period_Start,Period_End,Weapons_Type,Units,Value,Currency,Licenses_Issued,Weight,Licenses_Refused,AuthOrDel,GovtOrInd,Data_Source,Reliability,Accuracy,SmallArmsOnly,Comment,GlobalComment'
nbrCols = len(header.split(","))

def httpGET(countryCode):
    
    url = prefixURL + str(countryCode) + suffixURL
    r = http.request('GET', url)
    
    if(r.status == 200):
            
        # Get data
        bytesData = r.data
        
        # Convert to UTF-8
        data = bytesData.decode('utf-8')
        
        return data
    
    else:
        print("ERROR: " + str(r.status))
        return None
        
        
def parseContent(html):
    
    # split at header
    content = html.split(header)
    
    if(len(content) > 1):
        content = "<BR>".join(content[1:])
    else:
        return None
    
    # split at <br>
    content = content.replace("<BR>","\n")
        
    # remove html tags
    content = re.sub('<[^<]+?>', '', content)
    
    # init final
    content_clean = []
    for row in content.split("\n"):
        
        # remove leading/trailing whitespaces
        row = row.strip()
        
        # if empty
        if(len(row) == 0):
            continue
        
        # get nbr of cols
        rowNbrCols = len(row.split(","))
        
        # if matches header
        if(rowNbrCols == nbrCols):
            content_clean.append(row)
            
            
    # concat
    content = "\n".join(content_clean)
    
    return content


def getPage(countryCode):

    html = httpGET(countryCode)

    if(html is not None):
        return parseContent(html)
    else:
        return None

In [None]:
countryCodesFile = "COW country codes.csv"

def getCountryCodes():

    with open(countryCodesFile, 'r', newline='', encoding="utf-8") as csvfile:

        # init reader
        reader = csv.reader(csvfile, delimiter=',')

        # Taking the header of the file + the index of useful columns:
        header = next(reader)
        ind_abb = header.index('StateAbb')
        ind_cc = header.index('CCode')
        ind_name = header.index('StateNme')
        
        # init dict
        dictionary = {}
        
        # go through file
        for row in reader:
            
            # get data
            abb = row[ind_abb]
            cc = row[ind_cc]
            name = row[ind_name]
            
            # append to dict
            dictionary[cc] = name
            
        return dictionary

In [None]:
# Country code dict
dictionary = getCountryCodes()

output_path = "arms_trades_1.csv"

with open(output_path, 'w+', newline='', encoding="utf-8") as outfile:

    # write header
    outfile.write(header + "\n")
    
    # go through countries
    for index, key in enumerate(dictionary):
        
        # get data for that page
        data = getPage(key)
        
        # if failed
        if(data is None):
            print("ERROR: " + str(dictionary[key]))
            continue
        
        # write to file
        for row in data:
            outfile.write(row + "\n")
            
        print("DONE: " + str(dictionary[key]))
            
