## INTRASTAT COMMODITY CODE CHECK

Overview: Intrastat is a system that collects information relating to the trade of goods. This script will initiate a UK Trade Tariff API request, to check the commodity code under test. 

Task:

Below outlines the steps to be performed:

1) Import the necessary libraries for the project.
2) Define the functions that will faciliate the commodity check.
3) Send request to API and test for succesful response. 
4) Parse the api response content into a pandas dataframe.
5) Verify script with test data.


In [12]:
import requests
import json
import pandas as pd

API_URL = 'https://www.trade-tariff.service.gov.uk/api/v2/commodities/'
COLUMNS_RESP = ["CN8","Response Code", "Valid", "Description", "SU"]

def get_response(cn8): 
    # Format input data to 8-digit commodity code.
    cn8 = cn8.ljust(8, '0')    

    # Commodity API requires 10 digit query ID. Append suffixes to query string. 
    cn8_suffix = ('00','10','20','30','40','50','60','70','80','90','99','XX')

    # Make call to API.
    response_check = True
    while(response_check):
        for i in range(0,len(cn8_suffix)):
            # Test suffixes until 10 digit commodity query is identified.
            response_API = requests.get(API_URL + cn8 + cn8_suffix[i])
            if response_API.status_code == 200:
                response_check = False
                break
            # 'XX' - default suffix for unidentified commodity queries. 
            elif cn8_suffix[i] == 'XX':
                response_check = False
                break
    return response_API 
   
def get_description(json_file):
    #Read 'data' section into data frame.
    data = json_file["data"]
    df = pd.DataFrame.from_dict(data)
    description = df["attributes"].loc["formatted_description"]  
    return description

def get_supplementary(json_file):
    # Read 'included' section into data frame.
    included = json_file["included"]
    df = pd.DataFrame.from_dict(included)

    # Filter through data to extract supplementary unit.
    df = df.loc[(lambda x : x["type"] == "duty_expression")]
    df = pd.DataFrame(df["attributes"].tolist())
    df = df.loc[lambda x : ~x["formatted_base"].str.contains('span')]
    df = df.loc[lambda x : x["base"] != '']

    # Assign hyphen symbol to commidities without supplementary unit.
    su = '-' if df.empty else df["base"].iloc[0]
    return su

def request(cn8):
    # Send API request.
    response_API = get_response(cn8)
    if response_API.status_code == 200:
        message = True
        json_file = json.loads(response_API.text)
        description = get_description(json_file)
        su = get_supplementary(json_file)
    elif response_API.status_code == 404:
        message = False
        description = 'Invalid commodity code - no description'
        su = None
    else: 
        message = False
        description = 'Unexpected request error'
        su = None

    # Store response results in list. 
    data = [cn8,response_API.status_code,message,description,su]
    return data

def run_check(df, column_name):
    # Run validity check on each row of dataframe. Append response. 
    data = [request(c) for c in df[column_name]]
    df_out = pd.DataFrame(data,columns=COLUMNS_RESP)
    df_out = pd.merge(df, df_out, left_index=True, right_index=True)
    return df_out

def main(): 
    data = ['46012110','61041990','40151200','01022910','22021000','99999999']
    df = pd.DataFrame(data,columns=['CC']) 
    display(run_check(df,'CC'))
        
if __name__ == '__main__':
    main()

Unnamed: 0,CC,CN8,Response Code,Valid,Description,SU
0,46012110,46012110,200,True,Of plaits or similar products of plaiting mate...,-
1,61041990,61041990,200,True,Of artificial fibres,p/st
2,40151200,40151200,200,True,"Of a kind used for medical, surgical, dental o...",pa
3,1022910,1022910,200,True,"Young male bovine animals, intended for fattening",p/st
4,22021000,22021000,200,True,"Waters, including mineral waters and aerated w...",l
5,99999999,99999999,404,False,Invalid commodity code - no description,
