In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

## Read data from support calculation of high-\low-level command pairs

In [None]:
path = '/data/support.parquet'

In [3]:
df = pd.read_parquet (path)
df

Unnamed: 0,tuple_commands,tool/menu,event,sup1,sup2,file_index
0,"[Tool: Text (-200), End Event: Modify Text (282)]",Tool: Text (-200),End Event: Modify Text (282),3,1,0
1,"[Tool: Text (-200), End Event: Drag (75)]",Tool: Text (-200),End Event: Drag (75),3,2,0
2,"[Tool: Text (-200), End Event: Set Active Laye...",Tool: Text (-200),End Event: Set Active Layer (32),3,1,0
3,"[Menu: Save - (-5) (0), End Event: Drag (75)]",Menu: Save - (-5) (0),End Event: Drag (75),87,37,0
4,"[Menu: Save - (-5) (0), End Event: Set Active...",Menu: Save - (-5) (0),End Event: Set Active Layer (32),87,23,0
...,...,...,...,...,...,...
10611517,"[Menu: Create Section Viewport - (-114) (0), ...",Menu: Create Section Viewport - (-114) (0),End Event: Create PolyLine (231),748,2,155
10611518,"[Menu: Force Select - (-346) (0), End Event: ...",Menu: Force Select - (-346) (0),End Event: Edit Record in Worksheet (349),7488,2,155
10611519,"[Menu: Change Plant Grouping - (350) (0), End...",Menu: Change Plant Grouping - (350) (0),End Event: Plug-in Event (166),2,1,155
10611520,"[Menu: Change Plant Grouping - (350) (0), End...",Menu: Change Plant Grouping - (350) (0),End Event: Reshape (279),2,1,155


## Alignment of sub-files' calculation.

In [4]:
# As the support calculation is done by calculating sub-files, this step is to merge sup1 and sup2 together
def process_data(df_sorted):
    processed_data = []
    grouped_df = df_sorted.groupby('tool/menu')

    # Calculate sup1_group_sum for each tool/menu group
    for group_name, group_df in tqdm(grouped_df, desc='processing merging'):
        # Calculate sup1_group_sum for the current group
        sup1_group_sum = group_df.drop_duplicates('file_index')['sup1'].sum()
        
        # Assign sup1_group_sum to all rows in the current group
        group_df['sup1_group_sum'] = sup1_group_sum

        # Calculate sup2_sum for each event within the current group
        event_sums = group_df.groupby('event')['sup2'].sum().rename('sup2_sum')
        
        # Map sup2_sum back to the original group_df
        group_df = group_df.join(event_sums, on='event')

        processed_data.append(group_df)

    # Combine all processed groups back into a single DataFrame
    result_df = pd.concat(processed_data, ignore_index=True)
    return result_df

In [5]:
df_sum = process_data(df)

processing merging: 100%|█████████████████| 9155/9155 [00:05<00:00, 1547.01it/s]


In [6]:
df_sum

Unnamed: 0,tuple_commands,tool/menu,event,sup1,sup2,file_index,sup1_group_sum,sup2_sum
0,"[Menu: - (201) (0), End Event: Set Units (148)]",Menu: - (201) (0),End Event: Set Units (148),1,1,1,84,27
1,"[Menu: - (201) (0), End Event: Create linear...",Menu: - (201) (0),End Event: Create linear dimension (23),1,1,1,84,2
2,"[Menu: - (201) (0), End Event: Delete (58)]",Menu: - (201) (0),End Event: Delete (58),1,1,1,84,11
3,"[Menu: - (201) (0), End Event: Drag (75)]",Menu: - (201) (0),End Event: Drag (75),1,1,1,84,8
4,"[Menu: - (201) (0), End Event: Plug-in Event...",Menu: - (201) (0),End Event: Plug-in Event (166),1,1,3,84,16
...,...,...,...,...,...,...,...,...
10611517,"[Tool: ~ScaleMaker V2 (-232), End Event: Delet...",Tool: ~ScaleMaker V2 (-232),End Event: Delete (58),8,4,135,21,5
10611518,"[Tool: ~ScaleMaker V2 (-232), End Event: Chang...",Tool: ~ScaleMaker V2 (-232),End Event: Change Attributes (11),8,1,135,21,1
10611519,"[Tool: ~ScaleMaker V2 (-232), End Event: Resiz...",Tool: ~ScaleMaker V2 (-232),End Event: Resize (146),8,1,135,21,2
10611520,"[Tool: ~ScaleMaker V2 (-232), End Event: Mirro...",Tool: ~ScaleMaker V2 (-232),End Event: Mirror Duplicate (86),8,1,135,21,5


## Calculate Confidence

In [7]:
df_sum_1 = df_sum.sort_values(by= ['sup1_group_sum', 'sup2_sum'], ascending = False)
df_sum_2 = df_sum_1.drop(columns = ['sup1','sup2','file_index'])

In [8]:
df_sum_2['confidence'] = df_sum_2['sup2_sum']/df_sum_2['sup1_group_sum']

In [9]:
df_sum_2

Unnamed: 0,tuple_commands,tool/menu,event,sup1_group_sum,sup2_sum,confidence
9574073,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
9574577,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
9575104,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
9575617,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
9576138,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
...,...,...,...,...,...,...
7038370,[Menu: VWRemoteClientMenu - Vectorworks Remote...,Menu: VWRemoteClientMenu - Vectorworks Remote....,End Event: Set Active Layer (59),5,1,0.200000
7038371,[Menu: VWRemoteClientMenu - Vectorworks Remote...,Menu: VWRemoteClientMenu - Vectorworks Remote....,End Event: Plug-in Event (166),5,1,0.200000
6305154,"[Menu: Shao Yi - (266) (0), End Event: The End...",Menu: Shao Yi - (266) (0),End Event: The End Event (-1),3,3,1.000000
6305155,"[Menu: Shao Yi - (266) (0), End Event: Mirror ...",Menu: Shao Yi - (266) (0),End Event: Mirror Duplicate (86),3,3,1.000000


In [11]:
# Assuming df_confidence is already loaded and contains the relevant data
grouped_df = df_sum_2.groupby('tool/menu')
processed_data = []

for group_name, group_df in tqdm(grouped_df, desc='processing merging'):
    # Drop duplicates based on the 'event' column within each group
    group_df = group_df.drop_duplicates(subset='event')
    processed_data.append(group_df)

# Concatenate all processed groups back into a single DataFrame
df_sum_3 = pd.concat(processed_data, ignore_index=True)
df_sum_3 = df_sum_3.reset_index(drop=True)
df_sum_3 = df_sum_3.sort_values(by= ['sup1_group_sum', 'tool/menu', 'sup2_sum'], ascending = False)


processing merging: 100%|█████████████████| 9155/9155 [00:01<00:00, 6730.31it/s]


In [12]:
# Ignore the pair which confidence <= 0.1
df_sum_3 = df_sum_3[df_sum_3['confidence'] >0.1]
df_sum_3

Unnamed: 0,tuple_commands,tool/menu,event,sup1_group_sum,sup2_sum,confidence
655940,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
655941,"[Tool: Reshape (-214), End Event: Delete (58)]",Tool: Reshape (-214),End Event: Delete (58),29945982,5156015,0.172177
655942,"[Tool: Reshape (-214), End Event: Drag (75)]",Tool: Reshape (-214),End Event: Drag (75),29945982,4247671,0.141844
655943,"[Tool: Reshape (-214), End Event: Resize (146)]",Tool: Reshape (-214),End Event: Resize (146),29945982,4002706,0.133664
655944,"[Tool: Reshape (-214), End Event: Multiple Tra...",Tool: Reshape (-214),End Event: Multiple Transformation (248),29945982,3157471,0.105439
...,...,...,...,...,...,...
51460,"[Menu: CExtMenu_TranslateLib - (299) (0), End...",Menu: CExtMenu_TranslateLib - (299) (0),End Event: Exit Group (17),5,1,0.200000
51461,"[Menu: CExtMenu_TranslateLib - (299) (0), End...",Menu: CExtMenu_TranslateLib - (299) (0),End Event: Update References (128),5,1,0.200000
443366,"[Menu: Shao Yi - (266) (0), End Event: The End...",Menu: Shao Yi - (266) (0),End Event: The End Event (-1),3,3,1.000000
443367,"[Menu: Shao Yi - (266) (0), End Event: Mirror ...",Menu: Shao Yi - (266) (0),End Event: Mirror Duplicate (86),3,3,1.000000


In [13]:
# Ignore the effect of End Event which appear frequently and are not triggered by high level commands
df_sum_4 = df_sum_3[~df_sum_3['event'].isin(['End Event: Shape Pane Edit (89)', 'End Event: Delete (58)', 'End Event: Drag (75)', 'End Event: Set Active Layer (32)', 'End Event: Resize (146)'])]
df_sum_4

Unnamed: 0,tuple_commands,tool/menu,event,sup1_group_sum,sup2_sum,confidence
655940,"[Tool: Reshape (-214), End Event: Reshape (279)]",Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
655944,"[Tool: Reshape (-214), End Event: Multiple Tra...",Tool: Reshape (-214),End Event: Multiple Transformation (248),29945982,3157471,0.105439
636155,"[Tool: Move by Points (-352), End Event: Move ...",Tool: Move by Points (-352),End Event: Move (346),21365110,21043027,0.984925
630595,"[Tool: Line (-201), End Event: Create Line (166)]",Tool: Line (-201),End Event: Create Line (166),17893527,14422995,0.806045
412558,"[Menu: Save - (-5) (0), End Event: Duplicate ...",Menu: Save - (-5) (0),End Event: Duplicate (206),17072882,1873481,0.109734
...,...,...,...,...,...,...
51460,"[Menu: CExtMenu_TranslateLib - (299) (0), End...",Menu: CExtMenu_TranslateLib - (299) (0),End Event: Exit Group (17),5,1,0.200000
51461,"[Menu: CExtMenu_TranslateLib - (299) (0), End...",Menu: CExtMenu_TranslateLib - (299) (0),End Event: Update References (128),5,1,0.200000
443366,"[Menu: Shao Yi - (266) (0), End Event: The End...",Menu: Shao Yi - (266) (0),End Event: The End Event (-1),3,3,1.000000
443367,"[Menu: Shao Yi - (266) (0), End Event: Mirror ...",Menu: Shao Yi - (266) (0),End Event: Mirror Duplicate (86),3,3,1.000000


In [None]:
# Manually check again for the commands which appear frequently
df_sum_4.to_csv('/Users/confidence_test2.csv')

In [None]:
df_confidence = pd.read_csv('/Users/confidence_test2.csv')
df_confidence

Unnamed: 0.1,Unnamed: 0,tuple_commands,tool/menu,event,sup1_group_sum,sup2_sum,confidence
0,655940,['Tool: Reshape (-214)' 'End Event: Reshape (2...,Tool: Reshape (-214),End Event: Reshape (279),29945982,21260047,0.709947
1,655944,['Tool: Reshape (-214)' 'End Event: Multiple T...,Tool: Reshape (-214),End Event: Multiple Transformation (248),29945982,3157471,0.105439
2,636155,['Tool: Move by Points (-352)' 'End Event: Mov...,Tool: Move by Points (-352),End Event: Move (346),21365110,21043027,0.984925
3,630595,['Tool: Line (-201)' 'End Event: Create Line (...,Tool: Line (-201),End Event: Create Line (166),17893527,14422995,0.806045
4,412558,['Menu: Save - (-5) (0)' 'End Event: Duplicat...,Menu: Save - (-5) (0),End Event: Duplicate (206),17072882,1873481,0.109734
...,...,...,...,...,...,...,...
35810,51460,['Menu: CExtMenu_TranslateLib - (299) (0)' 'E...,Menu: CExtMenu_TranslateLib - (299) (0),End Event: Exit Group (17),5,1,0.200000
35811,51461,['Menu: CExtMenu_TranslateLib - (299) (0)'\n ...,Menu: CExtMenu_TranslateLib - (299) (0),End Event: Update References (128),5,1,0.200000
35812,443366,['Menu: Shao Yi - (266) (0)' 'End Event: The E...,Menu: Shao Yi - (266) (0),End Event: The End Event (-1),3,3,1.000000
35813,443367,['Menu: Shao Yi - (266) (0)' 'End Event: Mirro...,Menu: Shao Yi - (266) (0),End Event: Mirror Duplicate (86),3,3,1.000000


In [21]:
## For the commands which appear less than 1300 times, set confidence = 0.4 as threshold to determine whether they are related 

import pandas as pd
from tqdm import tqdm

# Assuming df_confidence is your DataFrame
grouped_df = df_confidence.groupby('tool/menu')
processed_data = []

for group_name, group_df in tqdm(grouped_df, desc='processing commands'):
    if group_df.iloc[0]['sup1_group_sum'] < 1300:
        group_df = group_df[group_df['confidence'] > 0.4]
        
    processed_data.append(group_df)

# Combine all processed groups back into a single DataFrame
result_df = pd.concat(processed_data, ignore_index=True)
result_df = result_df.drop(columns = 'Unnamed: 0')

result_df


processing commands: 100%|███████████████| 9061/9061 [00:00<00:00, 11491.53it/s]


Unnamed: 0,tuple_commands,tool/menu,event,sup1_group_sum,sup2_sum,confidence
0,['Menu: - (272) (0)' 'End Event: The Mantis ...,Menu: - (272) (0),End Event: The Mantis (-1),123,56,0.455285
1,['Menu: - (307) (0)' 'End Event: Create Data...,Menu: - (307) (0),End Event: Create Database Worksheet (19),30,20,0.666667
2,['Menu: Classes All On - (244) (0)' 'End Eve...,Menu: Classes All On - (244) (0),End Event: Classes All On (-1),49,44,0.897959
3,['Menu: Show All Classes - (344) (0)'\n 'End...,Menu: Show All Classes - (344) (0),End Event: Show All Classes (-1),238,225,0.945378
4,['Menu: Show All Objects - (356) (0)'\n 'End...,Menu: Show All Objects - (356) (0),End Event: Show All Objects (-1),14,12,0.857143
...,...,...,...,...,...,...
11144,['Tool: xplode - nudge Y- (-102)' 'End Event:...,Tool: xplode - nudge Y- (-102),End Event: xplode - nudge Y- (-1),44,44,1.000000
11145,['Tool: xplode - nudge Y- (-102)'\n 'End Even...,Tool: xplode - nudge Y- (-102),End Event: Definieerxplode - nudge Y- (-1),44,30,0.681818
11146,['Tool: xplode - nudge Z+ (-102)'\n 'End Even...,Tool: xplode - nudge Z+ (-102),End Event: Define xplode - nudge Z+ (-1),17,17,1.000000
11147,['Tool: xplode - nudge Z+ (-102)' 'End Event:...,Tool: xplode - nudge Z+ (-102),End Event: xplode - nudge Y+ (-1),17,9,0.529412


In [None]:
result_df.to_csv('/data/command_pairs_collections.csv')

## A manual refinement of the command_pairs_collections is further conducted within the software environment.