In [97]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

from joblib import Parallel, delayed

In [16]:
df = pd.read_csv('logistics-shopee-code-league/delivery_orders_march.csv')
npar = np.array(df)


In [98]:
sla_mat = [[3, 5, 7, 7], [5, 5, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]]

def deststr2idx(s):
    dests = ['Metro Manila', 'Luzon', 'Visayas', 'Mindanao']
    for idx, dest in enumerate(dests):
        if i.lower() in s.lower():
            return idx
    return False

def timestamp2date(ts):
    dt = datetime.fromtimestamp(ts)
    dt = dt.replace(hour=0, minute=0, second=0)
    return dt

def deststr2idx(s):
    dests = ['Metro Manila', 'Luzon', 'Visayas', 'Mindanao']
    for idx, dest in enumerate(dests):
        if dest.lower() in s.lower():
            return idx
    return False

def get_sla(seller_add, buyer_add):
    seller_idx = deststr2idx(seller_add)
    buyer_idx = deststr2idx(buyer_add)
    return sla_mat[seller_idx][buyer_idx]

# 2020-03-08 (Sunday);
# 2020-03-25 (Wednesday);
# 2020-03-30 (Monday);
# 2020-03-31 (Tuesday);

public_holidays = [datetime.strptime('2020-03-25', '%Y-%m-%d'), 
                  datetime.strptime('2020-03-30', '%Y-%m-%d'), 
                  datetime.strptime('2020-03-31', '%Y-%m-%d')]

def count_holidays(start, end):
    count = 0
    for ph in public_holidays:
        if ph >= start and ph <= end:
            count += 1
    return count

def count_sundays(start, end):
    count = 0
    for i in range(10000):
        date = start + timedelta(days=1)
        if date > end: 
            break
        if date.weekday() == 6:
            count += 1
    return count

def process_order(row):
    sla = get_sla(row[-1], row[-2])
    date_pick = timestamp2date(row[1])
    date_1st_attempt = timestamp2date(row[2])

    if np.isnan(row[3]):
        num_holidays = count_holidays(date_pick, date_1st_attempt)
        num_sundays = count_sundays(date_pick, date_1st_attempt)
        delta = date_1st_attempt - date_pick
        total_days = delta.days - num_holidays - num_sundays
        if total_days <= sla:
            return 0
        
        return 1
    
    date_2st_attempt = timestamp2date(row[3])
    
    if (date_2st_attempt - date_1st_attempt).days >= 3:
        return 1
    
    num_holidays = count_holidays(date_pick, date_2st_attempt)
    num_sundays = count_sundays(date_pick, date_2st_attempt)
    delta = date_2st_attempt - date_pick
    total_days = delta.days - num_holidays - num_sundays
    if total_days <= sla:
        return 0
    
    return 1

islate = Parallel(n_jobs=8)(delayed(process_order)(row) for row in tqdm(npar))


  0%|          | 0/3176313 [00:00<?, ?it/s][A
  0%|          | 1/3176313 [00:00<206:29:41,  4.27it/s][A
  0%|          | 2/3176313 [00:00<180:22:02,  4.89it/s][A
  0%|          | 16/3176313 [00:01<145:04:17,  6.08it/s][A
  0%|          | 17/3176313 [00:01<128:51:05,  6.85it/s][A
  0%|          | 27/3176313 [00:01<93:07:05,  9.48it/s] [A
  0%|          | 38/3176313 [00:01<67:35:45, 13.05it/s][A
  0%|          | 66/3176313 [00:01<48:26:28, 18.21it/s][A
  0%|          | 158/3176313 [00:01<34:12:30, 25.79it/s][A
  0%|          | 238/3176313 [00:02<24:39:42, 35.77it/s][A
  0%|          | 288/3176313 [00:02<17:48:48, 49.53it/s][A
  0%|          | 327/3176313 [00:02<13:42:30, 64.36it/s][A
  0%|          | 368/3176313 [00:02<10:16:19, 85.88it/s][A
  0%|          | 404/3176313 [00:02<8:25:38, 104.68it/s][A
  0%|          | 458/3176313 [00:02<6:27:46, 136.50it/s][A
  0%|          | 494/3176313 [00:02<5:43:58, 153.88it/s][A
  0%|          | 526/3176313 [00:03<4:58:52, 177.10it/s]

KeyboardInterrupt: 

In [65]:
timestamp2date(1583079143) == timestamp2date(1583079143)

True