In [40]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [41]:
DATASET_PATH = "./cleaned_datasets/enlarged_dataset.csv"
data = pd.read_csv(DATASET_PATH)
data.head()

Unnamed: 0,PO_NUM,ITEM_NAME,PART_DESCRIPTION,ITEM_CODE,SUPPLIER_NAME,SUPPLIER_CODE,ORDERED_QUANTITY,FAULTED_PARTS,PRICE,PO_VALUE,DOWNPAYMENT_DATE,DELIVERY_DATE
0,100000,Gripper,Pneumatic gripper for robotic arms,MOT7793,MOTHERSON SUMI WIRING INDIA LIMITED,VD904014,39,0,3740.04,145861.56,2023-09-05,2023-09-26
1,100000,3D Printer,Compact 3D printer for prototyping,MOT1546,MOTHERSON SUMI WIRING INDIA LIMITED,VD904014,180,9,2547.14,458485.2,2023-10-22,2023-11-29
2,100001,Pressure Sensor,High-precision pressure sensor,KYO7240,"KYOWA MANUFACTURING CO., LTD",VD650721,67,0,220.82,14794.94,2023-09-30,2023-11-06
3,100001,Proximity Sensor,Inductive proximity sensor,KYO2884,"KYOWA MANUFACTURING CO., LTD",VD650721,179,0,4716.11,844183.69,2023-12-03,2023-12-30
4,100002,Control Cabinet,Industrial control cabinet with locks,IND4956,INDEANA ENGINEERING,VD671527,300,15,1526.58,457974.0,2023-09-02,2023-12-10


In [42]:
# randomly sample 50% of PO_NUMB and keep records for those
unique_pos = data['PO_NUM'].unique()
po_subset = np.random.choice(unique_pos, size=len(unique_pos)//5, replace = False)
print("data size : ", data.shape)
data_subset = data[data['PO_NUM'].isin(po_subset)]
print("data_susbset size : ", data_subset.shape)

data size :  (58667, 12)
data_susbset size :  (11651, 12)


In [43]:
baskets = data.groupby('PO_NUM')['ITEM_NAME'].apply(list).tolist()
# baskets = [ basket if len(basket) > 1 else None for basket in baskets ]
# remove_none = [ basket for basket in baskets if basket is not None ]
# baskets = remove_none
baskets

[['Gripper', '3D Printer'],
 ['Pressure Sensor', 'Proximity Sensor'],
 ['Control Cabinet'],
 ['Stepper Motor',
  'Servo Motor Driver',
  'Temperature Sensor',
  'Proximity Sensor'],
 ['Servo Motor Driver', '12V DC Motor'],
 ['Power Supply'],
 ['Proximity Sensor', 'Control Cabinet', 'PLC Controller'],
 ['VFD Drive'],
 ['Stepper Motor', 'Servo Motor Driver'],
 ['Resistor', 'Power Supply'],
 ['Temperature Sensor', 'PLC Controller'],
 ['Temperature Sensor', 'Pressure Sensor', 'VFD Drive'],
 ['Resistor'],
 ['12V DC Motor', 'Servo Motor Driver', 'Proximity Sensor'],
 ['Temperature Sensor', 'Pressure Sensor'],
 ['3D Printer', 'Robotic Arm'],
 ['12V DC Motor', 'Temperature Sensor'],
 ['Proximity Sensor'],
 ['Servo Motor Driver', 'Control Cabinet'],
 ['Servo Motor Driver'],
 ['VFD Drive'],
 ['Servo Motor Driver',
  'Stepper Motor',
  'Proximity Sensor',
  'Pressure Sensor'],
 ['12V DC Motor', 'Control Cabinet', 'VFD Drive'],
 ['12V DC Motor'],
 ['Servo Motor Driver', 'Pressure Sensor'],
 ['Prox

In [44]:
te = TransactionEncoder()
te_ary = te.fit(baskets).transform(baskets)
df = pd.DataFrame(te_ary, columns=te.columns_)
df.head()

Unnamed: 0,10A Relay Switch,12V DC Motor,3D Printer,Control Cabinet,Gripper,PLC Controller,Power Supply,Pressure Sensor,Proximity Sensor,Resistor,Robotic Arm,Servo Motor Driver,Stepper Motor,Temperature Sensor,VFD Drive
0,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False
2,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,True,False,False,True,True,True,False
4,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False


In [45]:
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)



In [53]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(3D Printer),(10A Relay Switch),0.107567,0.108167,0.0201,0.186861,1.727527,0.008465,1.096778,0.471898
1,(10A Relay Switch),(3D Printer),0.108167,0.107567,0.0201,0.185824,1.727527,0.008465,1.096119,0.472216
2,(10A Relay Switch),(Gripper),0.108167,0.106567,0.0188,0.173806,1.630959,0.007273,1.081384,0.433785
3,(Gripper),(10A Relay Switch),0.106567,0.108167,0.0188,0.176415,1.630959,0.007273,1.082868,0.433008
4,(Power Supply),(10A Relay Switch),0.1069,0.108167,0.036433,0.340817,3.15085,0.02487,1.352937,0.764332
5,(10A Relay Switch),(Power Supply),0.108167,0.1069,0.036433,0.336826,3.15085,0.02487,1.346705,0.765418
6,(Resistor),(10A Relay Switch),0.103933,0.108167,0.035333,0.339962,3.142942,0.024091,1.351184,0.760911
7,(10A Relay Switch),(Resistor),0.108167,0.103933,0.035333,0.326656,3.142942,0.024091,1.330772,0.764523
8,(10A Relay Switch),(Robotic Arm),0.108167,0.105267,0.018767,0.173498,1.648173,0.00738,1.082554,0.440965
9,(Robotic Arm),(10A Relay Switch),0.105267,0.108167,0.018767,0.178277,1.648173,0.00738,1.085322,0.439536


In [52]:
rules.to_pickle('./models/apriori_ruleset.pkl')