## Delay Report

In [1]:
# Imports
import pandas as pd
import numpy as np
import random
import os
import json
import requests
import time

from tqdm.auto import tqdm
from pathlib import Path
from datetime import datetime, timedelta

In [2]:
from delay_report import OOCLExtractor, MSCExtractor, G2Extractor, DelayReport, write_json, read_config

In [3]:
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

In [4]:
# Read configuration file
with open("data/config.json", "r") as f:
    config = json.load(f)
    
# Used to map carrier names to the ones BigSchedule uses and supports
with open("data/carrier_mapping.json", "r") as f:
    carrier_mapping = json.load(f)

# # Prepare base information
# # UNLOCODE to port name mapping
# port_mapping = (
#     pd.concat([pd.read_csv(p, usecols=[1, 2, 4, 5], engine='python', names=[
#               'country', 'port', 'name', 'subdiv']) for p in Path('data').glob("*UNLOCODE CodeListPart*")])
#     .query('port == port')
#     .assign(
#         uncode=lambda x: x.country.str.cat(x.port),
#         full_name=lambda x: np.where(
#             x.subdiv.notnull(), x.name.str.cat(x.subdiv, sep=", "), x.name)
#     )
#     .drop_duplicates('uncode')
#     .set_index('uncode')
#     .to_dict('index')
# )

# Read the vessel delay tracking file
xl = pd.ExcelFile('Vessel Delay Tracking.xlsx')

In [11]:
hamburg_sheet = (
    xl.parse(pd.to_datetime(xl.sheet_names,
                            errors='coerce',
                            format='%d.%m.%Y').max().date().strftime('%d.%m.%Y'),
                            parse_dates=True)
                            .query(f"`Fwd Agent` in {[k for k,v in carrier_mapping.items() if v == 'HAMBURG']}")
                            .replace({'Fwd Agent': carrier_mapping})
)

In [14]:
hamburg_sheet

Unnamed: 0,Plnt,Req. Delivery Date,Shipment,Term,Sold-to-Party Name,Ship-to-Pty,Sales Ord.,Delivery,Description,Product Type,Vessel,Voyage,ETD Date,Disport ETA,Gross Weight,Port of Loading,Port of discharge,Incoterms Part2,No. of Containers,Container Type,MetPro Status,Fwd Agent,Booking Ref.,Reason for rejection description,No. of bundles,Item Status Information,Incoterms Part1,Shipping Cond,BOL Date,updated_etd,updated_eta,No. of days delayed ETD,No. of days delayed ETA,Reason of Delay
2,2503,2020-08-05,30012761,1750,ALCAN PRIMARY PRODUCTS COMPANY LLC,VISTASALES,15018166,802094769,IO 23KG B356.2 CONTAINER 44 N,IO,MAERSK IZMIR,030N,2020-08-01,2020-08-26,19.577,NZPOE,USNHD,"NORTH HOLLYWOOD, LA, USA",1,TEU,SHIPPED,HAMBURG,0DUD006943,,19,ETA 01.09.20,CIP,31,2020-08-01,2020-08-01,2020-08-26,,,
3,2503,2020-08-05,30012761,1750,ALCAN PRIMARY PRODUCTS COMPANY LLC,VISTASALES,15018167,802094770,IH 23KG P0404A CONTAINER 44 N,IH,MAERSK IZMIR,030N,2020-08-01,2020-08-26,19.617,NZPOE,USNHD,"NORTH HOLLYWOOD, LA, USA",1,TEU,SHIPPED,HAMBURG,0DUD006944,,19,ETA 01.09.20,CIP,31,2020-08-01,2020-08-01,2020-08-26,,,
40,2501,2020-08-05,30012768,1005,MCKECHNIE ALUMINIUM SOLUTIONS LTD,CMCKMETAL,15018180,802094824,BT 228 SF6060 5800 5/5 H,BT,MAERSK GARONNE,028S,2020-08-04,2020-08-09,135.16,AUBNE,NZNPL,"NEW PLYMOUTH CT, NEW ZEALAND",6,CNB,SHIPPED,HAMBURG,0BNE006501,,42,,DAT,31,2020-08-04,2020-08-04,2020-08-09,0.0,,
59,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018156,802094236,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.37,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006032,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
60,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018157,802094237,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.45,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006034,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
61,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018158,802094238,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.402,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006035,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
62,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018159,802094239,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.442,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006036,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
63,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018160,802094240,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.442,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006037,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
64,2504,2020-07-30,30012752,4050,BLUEQUEST RESOURCES (OVERSEAS) LTD.,BLUEQUESBR,15018161,802094241,BT 178 6063T 5801 6/6 H3,BT,MAERSK DANUBE,031N,2020-08-06,2020-10-13,103.446,AUSYD,BRIOA,"ITAPOA, BRAZIL",4,B26,SHIPPED,HAMBURG,0BNE006038,,44,O,CFR,31,2020-08-06,2020-08-06,2020-10-13,,,
91,2501,2020-08-15,30012769,4050,"HIHO METAL CO., LTD.",HIHO,15018173,802094823,IE 22KG BA170.1 CNTR 44 N,IE,MAERSK DANUBE,031N,2020-08-14,2020-09-02,413.75,AUBNE,KRBNP,"ANY BUSAN PORT, KOREA",17,CNO,SHIPPED,HAMBURG,0BNE006562,,408,,CIF,31,2020-08-14,2020-08-14,2020-09-02,0.0,,


In [None]:
headers = {
    'x-api-key': 'LJj1A6oZO6OjnqxQLogPaiSC2QrDtT2y'
}

In [None]:
url = "https://api.hamburgsud-line.com/v1/schedules/point-to-point"

def set_params(df, index):
    credentials = {
        "searchDate": '2020-07-16'
    }
    
    parameters = {
        "from": df.iloc[index]['port_of_loading'],
        "to": df.iloc[index]['port_of_discharge']
    }

    credentials.update(parameters)
    return credentials

In [None]:
class OOCLExtractor:
    def __init__(self, main_delay_sheet: pd.DataFrame, interval: tuple, carrier_mapping: dict):
        # Get the carrier mapping
        self.carrier_mapping = carrier_mapping

        # Get the OOCL delay sheet
        self.delay_sheet = (main_delay_sheet.query(f"`Fwd Agent` in {[k for k,v in self.carrier_mapping.items() if v == 'OOCL']}")
                            .replace({'Fwd Agent': self.carrier_mapping})
                            .drop(['updated_etd', 'updated_eta', 'No. of days delayed ETD',
                                   'No. of days delayed ETA', 'Reason of Delay'], axis=1)
                            .copy())

        # Get the MSC-specific port names from the UNLOCODEs
        self.port_mapping = {v['Port Code']: v['Port Name'] for k, v in (pd.read_excel('../../data/OOCL Port Code Mapping.xlsx')
                                                                         .to_dict('index').items())}

        # Get port name
        self.delay_sheet = self.delay_sheet.assign(pol_name=lambda x: x['Port of Loading'].apply(lambda y: self.port_mapping.get(y)),
                                                   pod_name=lambda x: x['Port of discharge'].apply(lambda y: self.port_mapping.get(y))).copy()

        self.interval = interval
        self.oocl_port_id = {}
        self.session = requests.Session()

    def get_location_id(self):
        """
        Checks if the query for locationID has been done today.
        If it has been done, skips it and uses the existing locationID JSON file.
        Otherwise, queries the locationID API.
        """
        if 'OOCL portID.json' not in os.listdir():
            def get_id(response):
                results = response.json().get('data').get('results')
                if results:
                    return results[0].get('LocationID')
                return None

            def query_id(port: str):
                url = f"https://www.oocl.com/_catalogs/masterpage/AutoCompleteSailingSchedule.aspx?type=sailingSchedule&Pars={port}"
                headers = {
                    'Sec-Fetch-User': '?1',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Sec-Fetch-Site': 'none',
                    'Sec-Fetch-Mode': 'navigate',
                    'Sec-Fetch-Dest': 'document',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36',
                    'Accept-Language': 'en-GB,en;q=0.9',
                    'Upgrade-Insecure-Requests': "1",
                    'Cache-Control': 'max-age=0',
                }
                return self.session.get(url, headers=headers)

            oocl_locations = (list(self.delay_sheet.pol_name.unique(
            )) + list(self.delay_sheet.pod_name.unique()))
            self.oocl_port_id = {location: get_id(
                query_id(location)) for location in tqdm(oocl_locations)}
            if len(self.oocl_port_id):
                write_json(self.oocl_port_id, 'OOCL portID.json')

            # PODs with no pod_id
            exception_cases = [
                k for k, v in self.oocl_port_id.items() if v is None]
            write_json(exception_cases, 'oocl_exceptions.txt')
        else:
            read_config(self, 'oocl_port_id', 'OOCL portID.json')

    def prepare(self):
        """
        Further filters self.delay_sheet to a smaller list of searches needed to fulfill all the lines on the
            delay_sheet.
        """
        # Further filter by POL-Vessel-Voyage to get ETD, POD-Vessel-Voyage to get ETA
        key = ['pol_name', 'pod_name']
        self.reduced_df = self.delay_sheet.drop_duplicates(key)[
            key].sort_values(key)

        self.reduced_df['pol_code'] = self.reduced_df.pol_name.map(
            self.oocl_port_id)
        self.reduced_df['pod_code'] = self.reduced_df.pod_name.map(
            self.oocl_port_id)

        # Unable to handle those with no pod_id in BigSchedules Web; dropping these lines
        self.reduced_df.dropna(inplace=True)

    def call_api(self):
        """
        Makes calls to the BigSchedules Web API, using information from the prepare method as parameters in the
        API request. Also saves the API responses into a subdirectory "responses/<today_date>".
        """
        def get_schedules(pol_locationID: str, pod_locationID: str, pol_name: str, pod_name: str):
            url = f"http://moc.oocl.com/nj_prs_wss/mocss/secured/supportData/nsso/searchHubToHubRoute"
            headers = {
                'Accept': 'application/json, text/plain, */*',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
                'Origin': 'http://moc.oocl.com',
                'Referer': 'http://moc.oocl.com/nj_prs_wss/',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'en-GB,en;q=0.9',
                'Cookie': 'userSearchHistory=%5B%7B%22origin%22%3A%22Brisbane%2C%20Queensland%2C%20Australia%22%2C%22destination%22%3A%22Bangkok%2C%20Thailand%22%2C%22originId%22%3A%22461802935875046%22%2C%22destinationId%22%3A%22461802935876800%22%2C%22originCountryCode%22%3A%22%22%2C%22destinationCountryCode%22%3A%22%22%2C%22transhipment_PortId%22%3Anull%2C%22transhipment_Port%22%3Anull%2C%22service%22%3Anull%2C%22port_of_LoadId%22%3Anull%2C%22port_of_Load%22%3Anull%2C%22port_of_DischargeId%22%3Anull%2C%22port_of_Discharge%22%3Anull%2C%22origin_Haulage%22%3A%22cy%22%2C%22destination_Haulage%22%3A%22cy%22%2C%22cargo_Nature%22%3A%22dry%22%2C%22sailing%22%3A%22sailing.from%22%2C%22weeks%22%3A%222%22%7D%5D; AcceptCookie=yes; BIGipServeriris4-wss=1597103762.61451.0000; BIGipServerpool_ir4moc=590470802.20480.0000; BIGipServerpool_moc_8011=2022663115.19231.0000'
            }

            payload = {
                "date": f"{(datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')}",
                "displayDate": f"{(datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')}",
                "transhipment_Port": None,
                "port_of_Load": None,
                "port_of_Discharge": None,
                "sailing": "sailing.from",
                "weeks": "2",
                "transhipment_PortId": None,
                "service": None,
                "port_of_LoadId": None,
                "port_of_DischargeId": None,
                "origin_Haulage": "cy",
                "destination_Haulage": "cy",
                "cargo_Nature": "dry",
                "originId": f"{pol_locationID}",
                "originCountryCode": "",
                "destinationCountryCode": "",
                "destinationId": f"{pod_locationID}",
                "origin": f"{pol_name}",
                "destination": f"{pod_name}",
                "weeksSymbol": "+"
            }

            return self.session.post(url, headers=headers, data=payload)

        self.response_jsons = []
        for row in tqdm(self.reduced_df.itertuples(), total=len(self.reduced_df)):
            response_filename = f'OOCL {int(row.pol_code)}-{int(row.pod_code)}.json'
            if response_filename not in os.listdir():
                response = get_schedules(int(row.pol_code), int(
                    row.pod_code), row.pol_name, row.pod_name)
                self.response_jsons.append(response.json())
                if len(response.json()):
                    write_json(response.json(), response_filename)
                time.sleep(random.randint(*self.interval))
            else:
                with open(response_filename, 'r') as f:
                    self.response_jsons.append(json.load(f))

    def extract(self):
        """
        Extracts information from the JSON responses from the call_api method and assembles the final dataframe.
        """
        def get_relevant_fields(response, i):
            def get_vv_etd(response, i):
                for j in range(len(response['data']['standardRoutes'][i]['Legs'])):
                    if response['data']['standardRoutes'][i]['Legs'][j]['Type'] == "Voyage":
                        voyage = response['data']['standardRoutes'][i]['Legs'][j]['ExternalVoyageReference']
                        vessel = response['data']['standardRoutes'][i]['Legs'][j]['VesselName']
                        etd = response['data']['standardRoutes'][i]['Legs'][j]['FromETDLocalDateTime']['dateStr']
                        return voyage, vessel, etd
                return ""

            def get_eta(response, i):
                for j in range(len(list(reversed(response['data']['standardRoutes'][i]['Legs'])))):
                    if response['data']['standardRoutes'][i]['Legs'][j]['Type'] == "Voyage":
                        eta = response['data']['standardRoutes'][i]['Legs'][j]['ToETALocalDateTime']['dateStr']
                        return eta
                return ""

            return {
                'pol_code': response['data']['standardRoutes'][i]['Legs'][0]['City']['ID'],
                'pod_code': response['data']['standardRoutes'][i]['Legs'][-1]['City']['ID'],
                'Voyage': get_vv_etd(response, i)[0],
                'Vessel': get_vv_etd(response, i)[1],
                'updated_etd': get_vv_etd(response, i)[2],
                'updated_eta': get_eta(response, i)
            }

        self.response_df = pd.DataFrame(([get_relevant_fields(response, i)
                                          for response in self.response_jsons
                                          if len(response)
                                          for i in range(len(response['data']['standardRoutes']))]))

        # Create reverse mapping from port_code to name
        oocl_port_id_reversed = {v: k for k, v in self.oocl_port_id.items()}

        self.response_df['pol_name'] = self.response_df.pol_code.map(
            oocl_port_id_reversed)
        self.response_df['pod_name'] = self.response_df.pod_code.map(
            oocl_port_id_reversed)

        self.response_df = self.response_df.sort_values('updated_eta').drop_duplicates(
            ['pol_code', 'pod_code', 'Voyage', 'Vessel'])

        merge_key = ['pol_name', 'pod_name', 'Vessel', 'Voyage']
        self.delay_sheet = (self.delay_sheet.reset_index().
                            merge(self.response_df[merge_key + ['updated_eta', 'updated_etd']],
                                  on=merge_key, how='left')
                            .set_index('index')
                            .copy())

        self.delay_sheet.updated_eta = pd.to_datetime(
            self.delay_sheet.updated_eta.str[:8], format='%Y%m%d')
        self.delay_sheet.updated_etd = pd.to_datetime(
            self.delay_sheet.updated_etd.str[:8], format='%Y%m%d')


In [None]:
port_mapping = {v['Port Code']: v['Port Name'] for k, v in (pd.read_excel('data/OOCL Port Code Mapping.xlsx')
                                                                         .to_dict('index').items())}

# Get port name
hamburg_sheet = hamburg_sheet.assign(pol_name=lambda x: x['Port of Loading'].apply(lambda y: port_mapping.get(y)),
                               pod_name=lambda x: x['Port of discharge'].apply(lambda y: port_mapping.get(y)))

In [None]:
os.getcwd()

In [None]:
# os.chdir('../..')
# Delay report skeleton
delay_report = DelayReport()
delay_report.run_oocl()
delay_report.run_msc()
delay_report.run_g2()
delay_report.calculate_deltas()
delay_report.output()