In [14]:
with open('libraries.py') as f:
    code = f.read()
exec(code)

with open('functions.py') as f:
    code = f.read()
exec(code)

In [15]:
import subprocess

# Run caffeinate in the background to prevent sleep
subprocess.Popen(['caffeinate'])

<Popen: returncode: None args: ['caffeinate']>

In [16]:
# determine user
user = getpass.getuser()
if user == 'peymansh':
    main_folder_path = '/Users/peymansh/Dropbox (MIT)/Research/AI and Occupations/ai-exposure'
    data_path = f'{main_folder_path}/output'

In [17]:
def get_tasks(onet_data_path,
              occupation_code):

    # Load the data
    onet = pd.read_csv(onet_data_path)
    onet = onet.sort_values(by=['year', 'occ_code', 'occ_title', 'task_id'])
    onet = onet[onet['year'] == 2023].reset_index(drop=True)

    # Get list of tasks
    my_df = onet[(onet.occ_code == f'{occupation_code}') & (onet.year == 2023)]
    tasks = my_df['task'].unique().tolist()
    return tasks

In [18]:
def find_neighbors(adjacency_matrix):
    # Get the number of nodes (n) from the shape of the adjacency matrix
    n = adjacency_matrix.shape[0]
    
    # Initialize an empty dictionary to store the neighbors for each node
    neighbors = {i: [] for i in range(n)}
    
    # Loop through each entry in the adjacency matrix
    for i in range(n):
        for j in range(n):
            # If there's an edge from i to j or from j to i, add j to the neighbors of i
            if adjacency_matrix[i, j] == 1 or adjacency_matrix[j, i] == 1:
                if j not in neighbors[i]:  # Avoid duplicate neighbors
                    neighbors[i].append(j)
                if i not in neighbors[j]:  # Ensure symmetry in the undirected version
                    neighbors[j].append(i)
    
    return neighbors

In [19]:
def create_inactive_node_neighbor_subset_combinations(inactive_neighbors_valid_subsets_dict):
    # Step 1: Extract lists from dictionary and remove duplicates within lists
    all_lists = [list(set(item)) for sublist in inactive_neighbors_valid_subsets_dict.values() for item in sublist]

    # Step 2: Create all combinations of lists across keys
    output_set = set()
    for r in range(1, len(all_lists) + 1):
        combinations = itertools.combinations(all_lists, r)
        for combo in combinations:
            # Flatten the combination of lists
            flattened_combo = list(itertools.chain(*combo))
            # Remove duplicates within the flattened list and sort for consistency
            unique_combo = tuple(sorted(set(flattened_combo)))
            # Add the unique combination to the output set
            output_set.add(unique_combo)

    # Convert the set back to a list of lists
    output_list = [list(combo) for combo in output_set]
    return sorted(output_list, key=len)

In [20]:
def get_valid_DAG_subsets(adjacency_matrix):
    # subset adjacency matrix to exclude Target node
    non_target_adjacency_matrix = adjacency_matrix[:-1,:-1].copy()

    # get number of non-Target nodes
    n = non_target_adjacency_matrix.shape[0]



    def valid_subsets_recursive(adjacency_matrix, active_dict, memory_dict, partition):
        # if partition already in memory return its value
        try:
            if len(memory_dict[tuple(sorted(partition))]) > 0:
                return memory_dict[tuple(sorted(partition))]
            
        # if partition not in memory, get valid subsets of partition
        except KeyError:
            # get inactive neighbors of node
            partition_neighbors = find_neighbors(adjacency_matrix)
            neighbors_list = []
            for node in partition:
                neighbors_list.append(partition_neighbors[node])
            
            # delete repetitions and keep unique neighbors
            neighbors_list = list(itertools.chain(*neighbors_list))
            neighbors_list = list(set(neighbors_list))

            inactive_neighbors_list = [neighbor for neighbor in neighbors_list if active_dict[neighbor] == False]

            # if partition has no outgoing edges return partition and empty list
            if len(inactive_neighbors_list) == 0:
                memory_dict[tuple(sorted(partition))] = [partition, []]
                return [partition, []]

            # for each inactive neighbor get valid subsets
            inactive_neighbors_valid_subsets = {}



            ############
            # set all neighbors to active and get valid subsets of neighbors
            for neighbor in inactive_neighbors_list:
                active_dict[neighbor] = True
            # print(f'\n\n>>>>>>>>>active dict: {active_dict}<<<<<<<<<')
            ############


            # create all subsets of inactive neighbors to loop over
            inactive_neighbor_subsets = []
            for r in range(len(inactive_neighbors_list) + 1):
                inactive_neighbor_subsets.extend(itertools.combinations(inactive_neighbors_list, r))
            inactive_neighbor_subsets = [list(subset) for subset in inactive_neighbor_subsets if len(subset) > 0]


            for neighbor_partition in inactive_neighbor_subsets:
                ############
                # make a copy of active_dict and memory dict for the neighbor contingencies
                neighbor_active_dict = active_dict.copy()
                neighbor_memory_dict = memory_dict.copy()
                ############

                valid_subsets = valid_subsets_recursive(adjacency_matrix, neighbor_active_dict, neighbor_memory_dict, neighbor_partition)

                # add neighbor partition itself to valid subsets of neighbor
                #valid_subsets = [subset + neighbor_partition for subset in valid_subsets]
                valid_subsets = sorted(valid_subsets, key=len)

                # add valid subsets of neighbor to memory
                memory_dict[tuple(sorted(neighbor_partition))] = valid_subsets

                # append valid subsets of neighbor to valid subsets of node
                inactive_neighbors_valid_subsets[tuple(sorted(neighbor_partition))] = valid_subsets

            # create all combinations of valid subsets of inactive neighbors
            for key, lists in inactive_neighbors_valid_subsets.items():
                #lists.append([])
                for inner_list in lists:
                    inner_list.extend(partition)
            
            inactive_neighbors_valid_subsets_combinations = []
            for key, value in inactive_neighbors_valid_subsets.items():
                inactive_neighbors_valid_subsets_combinations.extend(value)
            
            # add partition itself as a valid subset
            inactive_neighbors_valid_subsets_combinations.append(partition)

            # drop repetitions
            inactive_neighbors_valid_subsets_combinations = [sorted(list(subset)) for subset in set(tuple(subset) for subset in inactive_neighbors_valid_subsets_combinations)]
            
            # sort combinations by length
            inactive_neighbors_valid_subsets_combinations = sorted(inactive_neighbors_valid_subsets_combinations, key=len)

            # filter out empty subsets -- mainly for compatibility reasons due to last nodes (i.e., nodes w/o outgoing edges)
            inactive_neighbors_valid_subsets_combinations = [subset for subset in inactive_neighbors_valid_subsets_combinations if subset != []]

            return inactive_neighbors_valid_subsets_combinations
            


    # initialize dictionary for valid subsets origniating from each node
    valid_subsets_dict = {}
    
    # run the algorithm on each node and remove that node from the adjacency matrix after each iteration
    my_adjacency_matrix = non_target_adjacency_matrix.copy()
    for node in range(n):
        # create active dictionary
        global_active_dict = {i: False for i in range(n)}

        # set node as active
        global_active_dict[0] = True

        # initialize dict for valid subsets of nodes (and also partitions) to act as memory
        global_memory_dict = {}

        # get valid subsets of node 0 in current adjacency matrix
        valid_subsets = valid_subsets_recursive(my_adjacency_matrix, global_active_dict, global_memory_dict, [0])

        # adjust output above for actual node number in main adjacency matrix
        valid_subsets_dict[node] = [[element + node for element in subset] for subset in valid_subsets]

        # ensure empty subsets do not exist in valid subsets
        valid_subsets_dict[node] = [subset for subset in valid_subsets_dict[node] if len(subset) > 0]

        # remove current node from adjacency matrix for next iteration
        my_adjacency_matrix = my_adjacency_matrix[1:,1:]
    
    return valid_subsets_dict

In [21]:
def is_combination_valid(combination, n):
    # Flatten list of combination
    covered_tasks_list = [element for sublist in combination for element in sublist]
    
    # Create a set of the flattened list
    covered_tasks_set = set(covered_tasks_list)
    
    # Check if the flattened set has exactly n elements and contains all elements from 0 to n-1
    if len(covered_tasks_list) == n and covered_tasks_set == set(range(n)):
        return True
    else:
        return False


def generate_combinations(valid_subsets_dict, current_key=0, current_combination=None, result=None):
    if current_combination is None:
        current_combination = []
    if result is None:
        result = []

    # Base case: if convered all tasks add current combination to the result list
    if is_combination_valid(current_combination, len(valid_subsets_dict)):
        result.append(current_combination)
        return result

    # Recursive case: iterate through the list of lists at the current key
    for subset in valid_subsets_dict[current_key]:
        # Create a new combination including the current subset
        new_combination = current_combination + [subset]
        new_combination_flattened = [element for sublist in new_combination for element in sublist]
        
        # lower the load of computation by skipping invalid combinations
        # combination is invalid if:
        # 1. length of new combination is greater than the length of valid subsets dictionary
        # 2. new combination contains repetitive elements

        # skip cases in which the new combination is invalid
        if len(new_combination_flattened) > len(valid_subsets_dict):
            continue
        if len(new_combination_flattened) != len(set(new_combination_flattened)):
            continue

        # print(f'new combination: {new_combination}')

        # Check which nodes are NOT covered by the new combination
        uncovered_nodes = list(set(range(len(valid_subsets_dict))) - set(new_combination_flattened))
        if len(uncovered_nodes) == 0:
            if is_combination_valid(new_combination, len(valid_subsets_dict)):
                result.append(new_combination)
                return result
        else:
            # Recursively call the function to process the next key
            for next_key in range(current_key + 1, len(valid_subsets_dict)):
                generate_combinations(valid_subsets_dict, next_key, new_combination, result)
    
    return result

### Compute costs of all "valid" execution plans
#### New check for validity: automated cost of tasks in non-singleton partition must be less than human costs doing partition tasks separately

In [22]:
def get_partition_boundary(adjacency_matrix, partition):
    # create a matrix whose columns are nodes not in the partition and whose rows are nodes in the partition
    # (subset adjacency matrix to outgoing edges of partition nodes --i.e., rows-- and incoming edges of non-partition nodes --i.e., columns.)
    reduced_matrix = np.delete(adjacency_matrix, partition, axis=1) 
    reduced_matrix = reduced_matrix[partition, :]

    # find nodes in partition w/ an edge to non-partition nodes
    partition_boundary_tasks = [i for i in partition if np.any(reduced_matrix[partition.index(i), :])]

    return partition_boundary_tasks


def compute_partition_cost(adjacency_matrix, M_dict, A_dict, D_dict, AI_quality, partition):
    # initialize task_done_by_human as False
    # (only if partition is singleton and human cost <= automated cost partition is done manually)
    task_done_by_human = False

    # initialize partition boundary tasks as empty set []
    partition_boundary_tasks = []

    # if partition is a singleton 
    # pick minimum of human and management cost
    if len(partition) == 1:
        partition_is_valid = True # single-node partition is always valid

        # calculate human cost
        human_cost = sum(M_dict[key] for key in partition)

        # calculate management cost
        AI_cost = sum(A_dict[key] for key in partition)
        difficulty = sum(D_dict[key] for key in partition)
        management_cost = AI_cost * (AI_quality ** (-1 * difficulty))
        
        # pick the minimum of the two
        if human_cost < management_cost:
            partition_cost = human_cost
            task_done_by_human = True
        else:
            partition_cost = management_cost
            partition_boundary_tasks = partition
    

    # if partition not a singleton 
    # calculate management cost and return if partition passes a sanity check
    if len(partition) > 1:
        # calculate human cost
        human_cost = sum(M_dict[key] for key in partition)

        # calculate management cost
        # first get boundary tasks in partition
        partition_boundary_tasks = get_partition_boundary(adjacency_matrix, partition)

        # sanity check: no partition should have inner boundary of empty set
        if len(partition_boundary_tasks) == 0:
            # raise ValueError(f'Inner boundary of partition {partition} is empty set.')
            return 100000000, [], [], False
        
        # if partition has at least one boundary task calculate management cost
        # use boundary tasks for calculating management costs and partition tasks for difficulty
        AI_cost = sum(A_dict[key] for key in partition_boundary_tasks)
        difficulty = sum(D_dict[key] for key in partition)
        management_cost = AI_cost * (AI_quality ** (-1 * difficulty))

        # sanity check for partition validity: 
        # if human cost < management cost partition is invalid (should not have been formed)
        if human_cost < management_cost:
            partition_cost = 100000000 # (value doesn't matter)
            partition_is_valid = False
        else:
            partition_cost = management_cost
            partition_is_valid = True
            partition_boundary_tasks
    
    return partition_cost, partition_boundary_tasks, task_done_by_human, partition_is_valid


In [23]:
########## Random Thought: maybe better to sort valid_partitions on descending partition order to avoid recalculating single node partitions everytime? 
# tho the downside is that we have to first do the heavy calculations first...


def execute_plans(adjacency_matrix, valid_partitions, M_dict, A_dict, D_dict, alpha):
    execution_plan = []
    execution_plan_augmented_tasks = []
    execution_plan_human_tasks = []
    execution_plan_cost = []
    counter = 0
    for scheme in valid_partitions:
        # initialize scheme cost
        # and partitions that are done manually
        scheme_cost = 0
        augmented_tasks = []
        human_tasks = []
        
        for partition in scheme:
            # calculate partition cost 
            partition_cost, partition_boundary_tasks, task_done_by_human, partition_is_valid = compute_partition_cost(adjacency_matrix, M_dict, A_dict, D_dict, alpha, partition)
        
            # if (automated) partition is invalid ignore partition scheme
            # and stop calculating costs of further partitions
            if not partition_is_valid:
                break

            if task_done_by_human:
                human_tasks.append(partition)
            
            if not task_done_by_human:
                for boundary_task in partition_boundary_tasks:
                    augmented_tasks.append([boundary_task])

            # if (automated) partition passes sanity check
            # add this partition's cost to partition scheme cost
            scheme_cost += partition_cost
        
        # if stopped because an (automated) partition wasn't valid
        # ignore current partition scheme and continue
        if not partition_is_valid:
            continue
        
        # if partition scheme makes sense append costs
        execution_plan.append(scheme)
        execution_plan_augmented_tasks.append(augmented_tasks)
        execution_plan_human_tasks.append(human_tasks)
        execution_plan_cost.append(scheme_cost)

    return execution_plan, execution_plan_augmented_tasks, execution_plan_human_tasks, execution_plan_cost

### Combine steps into a function to run a for loop over

In [24]:
def DAG_costMin(input_path, output_path, n=1000):
    # set alpha as AI quality metric
    epsilon = 1e-8
    alpha_list = np.linspace(epsilon, 1-epsilon, n).tolist()



    # read DAG
    dag_df = pd.read_csv(input_path)

    # remove edges if comment column labeled with "TriangleRemovedFlag" (edge is there for plotting purposes and is not part of the actual DAG)
    if 'comment' in dag_df.columns:
        dag_df = dag_df[~dag_df['comment'].str.endswith('TriangleRemovedFlag')]

    # get task stats
    tasks_stats = pd.read_csv(f'{occupation_folder}/{occupation}_taskStats.csv')

    tasks_stats = tasks_stats[['task', 'human_cost', 'management_cost', 'management_difficulty']]
    tasks_stats.rename(columns={'management_difficulty': 'difficulty'}, inplace=True)

    # tasks_stats = tasks_stats[['task', 'human_cost', 'machine_cost', 'completion_difficulty']]
    # tasks_stats.rename(columns={'machine_cost': 'management_cost', 'completion_difficulty': 'difficulty'}, inplace=True)



    
    # extract list of tasks and create a dictionary for indexing tasks
    tasks_list = tasks_stats['task'].unique()
    tasks_dict = {i: node for i, node in enumerate(tasks_list, start=0)}

    # create numpy array of adjacency matrix
    adjacency_matrix = np.zeros((len(tasks_list), len(tasks_list)), dtype=int)
    aux_dict = {value: key for key, value in tasks_dict.items()}
    for _, row in dag_df.iterrows():
        source_index = aux_dict[row['source']]
        target_index = aux_dict[row['target']]
        adjacency_matrix[source_index, target_index] = 1


    # add task_dict key and reset index
    aux_dict = {value: key for key, value in tasks_dict.items()}
    tasks_stats['dict_index'] = tasks_stats.apply(lambda row: aux_dict[row.task], axis=1)
    tasks_stats = tasks_stats.sort_values(by='dict_index')
    tasks_stats = tasks_stats.set_index('dict_index', drop=False)
    tasks_stats.index.name = None




    # create dictionaries for human cost, management cost, and difficulty
    M_dict = dict(zip(tasks_stats['dict_index'], tasks_stats['human_cost']))
    A_dict = dict(zip(tasks_stats['dict_index'], tasks_stats['management_cost']))
    D_dict = dict(zip(tasks_stats['dict_index'], tasks_stats['difficulty']))


    # get all valid subsets in a dict
    valid_subsets_creation_start = time.time()
    valid_subsets_dict = get_valid_DAG_subsets(adjacency_matrix)
    valid_subsets_creation_time = (time.time() - valid_subsets_creation_start)/60
    print(f"valid subsets dictionary creation: {valid_subsets_creation_time:.2f} minutes")
    
    # generate valid combinations
    generate_valid_partitions_start = time.time()
    valid_partitions = generate_combinations(valid_subsets_dict, 0)
    generate_valid_partitions_time = (time.time() - generate_valid_partitions_start)/60
    print(f"valid execution plans generation: {generate_valid_partitions_time:.2f} minutes")

    # Print stats
    print(f'Number of valid partitioning schemes given DAG structure: {len(valid_partitions)}')


    
    # run once to get stat
    execution_plan, execution_plan_augmented_tasks, execution_plan_human_tasks, execution_plan_cost = execute_plans(adjacency_matrix, valid_partitions, M_dict, A_dict, D_dict, 0.5)
    print(f'Number of valid execution plans for alpha = 0.5 as example: {len(execution_plan)}')




    random.seed(1)
    minimum_cost_list = []
    number_of_optimal_schemes_list = []
    optimal_execution_plan_list = []
    optimal_plan_augmentedTasks_list = []
    optimal_plan_augmentedTasks_count_list = []
    optimal_plan_humanTasks_list = []
    optimal_plan_humanTasks_count_list = []
    for counter, alpha in enumerate(alpha_list):
        # if counter % 100 == 0:
        #     print(f'-- Running {counter}th alpha --')

        # get list of execution plans and costs for this alpha
        execution_plan, execution_plan_augmented_tasks, execution_plan_human_tasks, execution_plan_cost = execute_plans(adjacency_matrix, valid_partitions, M_dict, A_dict, D_dict, alpha)

        # choose minimum
        minimum_cost = min(execution_plan_cost)
        minimum_cost_index = [index for index, value in enumerate(execution_plan_cost) if value == minimum_cost]

        # in rare cases there are more than one optimal plan
        if len(minimum_cost_index) > 1:
            optimal_execution_scheme = [execution_plan[index] for index in minimum_cost_index]
            optimal_execution_human_tasks = [execution_plan_human_tasks[index] for index in minimum_cost_index]
            optimal_execution_augmented_tasks = [execution_plan_augmented_tasks[index] for index in minimum_cost_index]
            # print(alpha)
            # print(optimal_execution_scheme)
            # print(optimal_execution_human_tasks)
            # print(f'Multiple Execution Plans for alpha={alpha}')
        else:
            optimal_execution_scheme = execution_plan[minimum_cost_index[0]]
            optimal_execution_human_tasks = execution_plan_human_tasks[minimum_cost_index[0]]
            optimal_execution_augmented_tasks = execution_plan_augmented_tasks[minimum_cost_index[0]]

        # append lists
        minimum_cost_list.append(minimum_cost)
        number_of_optimal_schemes_list.append(len(minimum_cost_index))
        optimal_execution_plan_list.append(optimal_execution_scheme)
        optimal_plan_augmentedTasks_list.append(optimal_execution_augmented_tasks)
        optimal_plan_augmentedTasks_count_list.append(len(optimal_execution_augmented_tasks))
        optimal_plan_humanTasks_list.append(optimal_execution_human_tasks)
        optimal_plan_humanTasks_count_list.append(len(optimal_execution_human_tasks))

    # save outputs
    output_df = pd.DataFrame({
        'alpha': alpha_list,
        'optimal_schemes_count': number_of_optimal_schemes_list,
        'cost': minimum_cost_list,
        'optimal_scheme': optimal_execution_plan_list,
        'optimal_scheme_augmented_tasks': optimal_plan_augmentedTasks_list,
        'augmented_tasks_count': optimal_plan_augmentedTasks_count_list,
        'optimal_scheme_human_tasks': optimal_plan_humanTasks_list,
        'human_tasks_count': optimal_plan_humanTasks_count_list
    })
    output_df.to_csv(output_path, index=False)

## Main Code

In [25]:
import time
start_time = time.time()

# number of alphas to sweep over
n = 100
print(f'Number of alphas to sweep over: {n}')

onet_data_path = f'{data_path}/data/onet_occupations_yearly.csv'

# occupation_list = ['pileDriverOperators', 'dredgeOperators', 'gradersAndSortersForAgriculturalProducts',
#                    'insuranceUnderwriters', 'insuranceAppraisersForAutoDamage', 'floorSandersAndFinishers', 
#                    'reinforcingIronAndRebarWorkers', 'travelAgents', 'dataEntryKeyer', 
#                    'athletesAndSportsCompetitors', 'audiovisualEquipmentInstallerAndRepairers', 'hearingAidSpecialists', 
#                    'personalCareAides', 'proofreadersAndCopyMarkers', 'chiropractors', 
#                    'shippingReceivingAndInventoryClerks', 'cooksShortOrder', 'orthodontists',
#                    'subwayAndStreetcarOperators', 'packersAndPackagersHand', 'hoistAndWinchOperators', 
#                    'forgingMachineSettersOperatorsAndTenders', 'avionicsTechnicians', 'dishwashers', 
#                    'dispatchersExceptPoliceFireAndAmbulance', 'familyMedicinePhysicians', 'MachineFeedersAndOffbearers'
#                    ]

# occupation_list = ['travelAgents', 'insuranceUnderwriters', 'pileDriverOperators'
#                    ]

# removed "personalCareAides"
occupation_list = ['pileDriverOperators', 'dredgeOperators', 'gradersAndSortersForAgriculturalProducts',
                   'insuranceUnderwriters', 'insuranceAppraisersForAutoDamage', 'floorSandersAndFinishers', 
                   'reinforcingIronAndRebarWorkers', 'travelAgents', 'dataEntryKeyer', 
                   'athletesAndSportsCompetitors', 'audiovisualEquipmentInstallerAndRepairers', 'hearingAidSpecialists', 
                   'proofreadersAndCopyMarkers', 'chiropractors', 
                   'shippingReceivingAndInventoryClerks', 'cooksShortOrder', 'orthodontists',
                   'subwayAndStreetcarOperators', 'packersAndPackagersHand', 'hoistAndWinchOperators', 
                   'forgingMachineSettersOperatorsAndTenders', 'avionicsTechnicians', 'dishwashers', 
                   'dispatchersExceptPoliceFireAndAmbulance', 'familyMedicinePhysicians', 'MachineFeedersAndOffbearers'
                   ]

# occupation_list = ['travelAgents']

Number of alphas to sweep over: 100


In [26]:
num_tasks_current = 0
num_tasks_previous = 0
for occupation in occupation_list:
    print(f'\n---------------------- Running: {occupation} ----------------------')
    occupation_start_time = time.time()

    # generate occupation-specific strings
    GPT_input_occupation, plot_title_occupation, occupation_code, occupation_folder = pick_occupation(occupation)


    # Get occupation tasks to create all possible partitions
    tasks = get_tasks(onet_data_path, occupation_code)
    num_tasks_current = len(tasks)
    print(f'Number of non-target tasks: {num_tasks_current}')

    # Manual DAG
    M_input_path = f'{occupation_folder}/{occupation}_M_DAG_df.csv'
    M_output_path = f'{occupation_folder}/{occupation}_costMin_M.csv'

    # First Last Task DAG
    N_input_path = f'{occupation_folder}/{occupation}_N_GPT_DAG_df.csv'
    N_output_path = f'{occupation_folder}/{occupation}_costMin_N.csv'

    # First Last Task DAG
    CN_input_path = f'{occupation_folder}/{occupation}_CN_GPT_DAG_df.csv'
    CN_output_path = f'{occupation_folder}/{occupation}_costMin_CN.csv'

    # First Last Task DAG
    FLT_input_path = f'{occupation_folder}/{occupation}_FLT_GPT_DAG_df.csv'
    FLT_output_path = f'{occupation_folder}/{occupation}_costMin_FLT.csv'

    # Conditioned First Last Task DAG
    CFLT_input_path = f'{occupation_folder}/{occupation}_CFLT_GPT_DAG_df.csv'
    CFLT_output_path = f'{occupation_folder}/{occupation}_costMin_CFLT.csv'

    # Partitioned DAG
    P_input_path = f'{occupation_folder}/{occupation}_P_GPT_DAG_df.csv'
    P_output_path = f'{occupation_folder}/{occupation}_costMin_P.csv'

    # Conditioned Partitioned DAG
    CP_input_path = f'{occupation_folder}/{occupation}_CP_GPT_DAG_df.csv'
    CP_output_path = f'{occupation_folder}/{occupation}_costMin_CP.csv'
    


    # create list of all DAGs
    if occupation in ['travelAgents', 'insuranceUnderwriters', 'pileDriverOperators']:
        DAG_indicator_list = ['Manual DAG', 'Naive DAG', 'Conditioned Naive DAG', 'First-Last Task DAG', 'Conditioned First-Last Task DAG', 'Partitioned DAG', 'Conditioned Partitioned DAG']
        input_paths_list = [M_input_path, N_input_path, CN_input_path, FLT_input_path, CFLT_input_path, P_input_path, CP_input_path]
        output_paths_list = [M_output_path, N_output_path, CN_output_path, FLT_output_path, CFLT_output_path, P_output_path, CP_output_path]
    else:
        DAG_indicator_list = ['Naive DAG', 'Conditioned Naive DAG', 'First-Last Task DAG', 'Conditioned First-Last Task DAG', 'Partitioned DAG', 'Conditioned Partitioned DAG']
        input_paths_list = [N_input_path, CN_input_path, FLT_input_path, CFLT_input_path, P_input_path, CP_input_path]
        output_paths_list = [N_output_path, CN_output_path, FLT_output_path, CFLT_output_path, P_output_path, CP_output_path]


    for DAG_indicator, input_path, output_path in zip(DAG_indicator_list, input_paths_list, output_paths_list):
        print(f'\n-------Running: {occupation} - {DAG_indicator}-------')
        
        DAG_start_time = time.time()
        DAG_costMin(input_path, output_path, n)
        DAG_end_time = time.time()

        DAG_execution_time = DAG_end_time - DAG_start_time
        print(f"\n{occupation} {DAG_indicator} runtime: {DAG_execution_time:.2f} seconds")

    occupation_end_time = time.time()
    occupation_execution_time = (occupation_end_time - occupation_start_time)/60
    print(f"\n\n************* {occupation} runtime: {occupation_execution_time:.2f} minutes *************")
    runtime_since_start = (time.time() - start_time)/60
    print(f"\nruntime since start: {runtime_since_start:.2f} minutes\n")


end_time = time.time()
execution_time = (end_time - start_time)/60
print(f"\n\nTotal Runtime: {execution_time:.2f} minutes")


---------------------- Running: pileDriverOperators ----------------------
Number of non-target tasks: 5

-------Running: pileDriverOperators - Manual DAG-------
valid subsets dictionary creation: 0.00 minutes
valid execution plans generation: 0.00 minutes
Number of valid partitioning schemes given DAG structure: 26
Number of valid execution plans for alpha = 0.5 as example: 25

pileDriverOperators Manual DAG runtime: 0.05 seconds

-------Running: pileDriverOperators - Naive DAG-------
valid subsets dictionary creation: 0.00 minutes
valid execution plans generation: 0.00 minutes
Number of valid partitioning schemes given DAG structure: 30
Number of valid execution plans for alpha = 0.5 as example: 27

pileDriverOperators Naive DAG runtime: 0.05 seconds

-------Running: pileDriverOperators - Conditioned Naive DAG-------
valid subsets dictionary creation: 0.00 minutes
valid execution plans generation: 0.00 minutes
Number of valid partitioning schemes given DAG structure: 20
Number of va