# Tasking Data Processing
To pull from Splunk:

```
index="" host="" sourcetype=""
| search "Received Status Update"
| rex field=_raw "itemIds:\[(?<skuNo>\d+)\]"
| rex field=_raw "pickLocationEmptyConfidence:.*?statusCode:(?<statusCode>\d+), statusDetail:,"
| rex field=_raw "customerTaskId:(?<taskID>\w+), itemIds"
| rex field=_raw "customerTaskId:\d{3}_(?<stationID>\d+)_"
| rex field=_raw "\"suction_cup_name\":\"(?<suctionCupName>\w+)\""
| rex field=_raw "commandUtime:(?<ctime>\d+)\}"
| eval commandTime = strftime(ctime/1000000,"%Y-%m-%d %H:%M:%S")
| dedup skuNo, statusCode, taskID, ctime keepempty=true consecutive=false
| table skuNo, statusCode, stationID, taskID, suctionCupName, ctime, commandTime
```

# Process the raw tasking data
* Read the data from csv
* Get the cup configuration from the previous task
* Remove UPC error and pause state

In [264]:
import csv
import os
import pathlib
import datetime
fname = 'rhr_tasks_0406_0706.csv'
fpath = pathlib.Path(os.getcwd()) / fname

In [265]:
class Task:
    def __init__(self, sku, status, stationId, taskId, suctionCup, ctime, commandTime):
        self.sku = sku
        self.status = status
        self.stationId = stationId
        self.taskId = taskId
        self.suctionCup = suctionCup
        self.ctime = int(ctime)
        self.commandTime = datetime.datetime.strptime(commandTime, '%Y-%m-%d %H:%M:%S')
        
    def __str__(self):
        return ','.join([k+':'+str(v) for k, v in vars(self).items()])

In [266]:
raws = []
with open(fpath, 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        if row[0] == '' or row[0] is None:
            continue
        try:
            raws.append(Task(row[0], row[1], row[2], row[3], row[4], row[5], row[6]))
        except Exception as e:
            print(row)

['skuNo', 'statusCode', 'stationID', 'taskID', 'suctionCupName', 'ctime', 'commandTime']


In [267]:
# sort raw
def sort_raw(raw:Task):
    return (raw.stationId, raw.ctime)
raws = sorted(raws, key=sort_raw)

In [268]:
len(raws)

256092

In [269]:
[print(x) for x in raws[0:5]]

sku:1612062,status:200451,stationId:44,taskId:173_44_13433619_17801046_1_12539726_30007_1335673_1_346256_1_1,suctionCup:swappable_vs_25_nr,ctime:1681124534309755,commandTime:2023-04-10 07:02:14
sku:1612062,status:200302,stationId:44,taskId:173_44_13433619_17801046_1_12539726_30007_1335673_1_346256_1_1,suctionCup:swappable_vs_25_nr,ctime:1681124534309755,commandTime:2023-04-10 07:02:14
sku:1612062,status:200451,stationId:44,taskId:173_44_13433619_17801046_1_12539726_30007_1335673_1_346256_1_1,suctionCup:,ctime:1681124558989463,commandTime:2023-04-10 07:02:38
sku:1612062,status:200302,stationId:44,taskId:173_44_13433619_17801046_1_12539726_30007_1335673_1_346256_1_1,suctionCup:,ctime:1681124558989463,commandTime:2023-04-10 07:02:38
sku:1612062,status:200451,stationId:44,taskId:173_44_13433628_17797494_1_12539726_30007_1335673_1_346265_1_1,suctionCup:swappable_vs_25_nr,ctime:1681124572825103,commandTime:2023-04-10 07:02:52


[None, None, None, None, None]

In [271]:
# remove pause and UPC error tasks and take the suction cup from previous tasks
tasks = []
suctionCup = None
for task in raws:
    suctionCup = task.suctionCup if task.suctionCup != '' else suctionCup
    if task.status != '200302' and task.status != '200451':
        tasks.append(Task(task.sku, task.status, task.stationId, task.taskId, suctionCup, task.ctime, datetime.datetime.strftime(task.commandTime, '%Y-%m-%d %H:%M:%S')))        

In [272]:
len(tasks)

131143

In [273]:
[print(x) for x in tasks[0:10]]

sku:867474,status:200441,stationId:44,taskId:173_44_13433617_17801003_1_12539722_4628_1336428_1_346254_1_1,suctionCup:swappable_bgx_48,ctime:1681124654228828,commandTime:2023-04-10 07:04:14
sku:867474,status:200443,stationId:44,taskId:173_44_13433617_17801003_1_12539722_4628_1336428_1_346254_1_1,suctionCup:swappable_bgx_48,ctime:1681124697237222,commandTime:2023-04-10 07:04:57
sku:867474,status:200450,stationId:44,taskId:173_44_13433626_17801015_1_12539722_4628_1336428_1_346263_1_1,suctionCup:swappable_bgx_48,ctime:1681124714220196,commandTime:2023-04-10 07:05:14
sku:867474,status:200422,stationId:44,taskId:173_44_13433626_17801015_1_12539722_4628_1336428_1_346263_1_1,suctionCup:swappable_bgx_48,ctime:1681124735412548,commandTime:2023-04-10 07:05:35
sku:24430096,status:200450,stationId:44,taskId:173_44_13433618_17801041_1_12539725_30479_1342732_2_346255_1_1,suctionCup:swappable_vs_25_nr,ctime:1681125176255935,commandTime:2023-04-10 07:12:56
sku:24430096,status:200450,stationId:44,taskI

[None, None, None, None, None, None, None, None, None, None]

# Aggregatet the data into SKU/CUP level for the AB test

First need to calculate by SKU and cup, what is the success rate 

In [274]:
import pandas as pd

In [275]:
df_tasks = pd.DataFrame([vars(t) for t in tasks])

In [276]:
df_tasks.head()

Unnamed: 0,sku,status,stationId,taskId,suctionCup,ctime,commandTime
0,867474,200441,44,173_44_13433617_17801003_1_12539722_4628_13364...,swappable_bgx_48,1681124654228828,2023-04-10 07:04:14
1,867474,200443,44,173_44_13433617_17801003_1_12539722_4628_13364...,swappable_bgx_48,1681124697237222,2023-04-10 07:04:57
2,867474,200450,44,173_44_13433626_17801015_1_12539722_4628_13364...,swappable_bgx_48,1681124714220196,2023-04-10 07:05:14
3,867474,200422,44,173_44_13433626_17801015_1_12539722_4628_13364...,swappable_bgx_48,1681124735412548,2023-04-10 07:05:35
4,24430096,200450,44,173_44_13433618_17801041_1_12539725_30479_1342...,swappable_vs_25_nr,1681125176255935,2023-04-10 07:12:56


In [277]:
df_tasks['success'] = df_tasks['status'].apply(lambda x: True if x == '200399' else False)

In [278]:
df_tasks.sample(10)

Unnamed: 0,sku,status,stationId,taskId,suctionCup,ctime,commandTime,success
17259,105809,200399,44,173_44_13828445_18425521_1_12921789_34636_1384...,swappable_bgx_48,1682949293087092,2023-05-01 09:54:53,True
90212,504308,200399,45,174_45_14042330_18723983_1_13125613_3832_14062...,swappable_bgx_48,1683743979324663,2023-05-10 14:39:39,True
85457,815014,200399,45,174_45_13839267_18438677_1_12929661_29324_1346...,swappable_vs_25_nr,1682961052186310,2023-05-01 13:10:52,True
83624,650499,200450,45,174_45_13800437_18380103_1_12895644_24762_1378...,swappable_vs_25_nr,1682683890983776,2023-04-28 08:11:30,False
76731,24376645,200399,45,174_45_13672197_18173543_1_12768288_9677_13584...,swappable_bgx_48,1682084544455316,2023-04-21 09:42:24,True
55689,565436,200420,44,173_44_14902815_19951252_1_13976558_5441_15012...,swappable_bgx_48,1687875095226470,2023-06-27 10:11:35,False
18389,2681262,200450,44,173_44_13869796_18487770_1_12959364_32039_1387...,swappable_vs_25_nr,1683053279273177,2023-05-02 14:47:59,False
67630,716312,200450,45,174_45_13467716_17856401_1_12571489_26038_1337...,swappable_vs_25_nr,1681233514752513,2023-04-11 13:18:34,False
72557,108230,200399,45,174_45_13566840_18007040_1_12668287_28683_1351...,swappable_bgx_48,1681750743690290,2023-04-17 12:59:03,True
110416,1015828,200399,45,174_45_14509307_19400840_1_13584432_27667_1441...,swappable_vs_25_nr,1685993999935932,2023-06-05 15:39:59,True


In [279]:
df = df_tasks.groupby(['sku','suctionCup'], sort=True).agg(p=pd.NamedAgg(column='success', aggfunc=lambda x: sum(x)/len(x)), n=pd.NamedAgg(column='success', aggfunc='count'))
df = df.reset_index()
df = df.sort_values(by='n', ascending=False)

In [280]:
df.head(20)

Unnamed: 0,sku,suctionCup,p,n
911,2030289,swappable_bgx_48,0.87551,1470
1612,24388284,swappable_vs_25_nr,0.892979,1467
1608,24388084,swappable_bgx_48,0.897872,1410
1056,2145184,swappable_vs_25_nr,0.927579,1367
4875,650499,swappable_vs_25_nr,0.801865,1287
5939,867474,swappable_bgx_48,0.5184,1250
4276,512663,swappable_vs_25_nr,0.998117,1062
5938,867473,swappable_bgx_48,0.655667,697
6177,917881,swappable_bgx_48,0.920923,607
505,1611435,swappable_vs_25_nr,0.831597,576


## Need to find the SKUs picked with more than one cup 

In [281]:
df_filtered = df[df['n'] >= 20]

In [282]:
df_filtered = df_filtered[df_filtered['sku'].duplicated(keep=False)].sort_values(by='sku', ascending=False)

In [283]:
df_filtered

Unnamed: 0,sku,suctionCup,p,n
6105,900110,swappable_vs_18_nr,0.948276,58
6106,900110,swappable_vs_25_nr,0.979592,49
6042,891312,swappable_vs_18_nr,1.000000,23
6043,891312,swappable_vs_25_nr,1.000000,26
5946,867590,swappable_vs_25_nr,1.000000,47
...,...,...,...,...
133,108985,swappable_vs_18_nr,0.841270,63
113,107250,swappable_vs_18_nr,0.973684,38
114,107250,swappable_vs_25_nr,1.000000,44
54,1017452,swappable_vs_18_nr,1.000000,24
