# Load libraries and functions

In [513]:
with open('libraries.py') as f:
    code = f.read()
exec(code)

with open('functions.py') as f:
    code = f.read()
exec(code)

In [514]:
# determine user
user = getpass.getuser()
if user == 'peymansh':
    main_folder_path = '/Users/peymansh/Dropbox (MIT)/Research/AI and Occupations/ai-exposure'
    data_path = f'{main_folder_path}/output'

# Main Code

In [515]:
# Pick occupation and initialize variables
occupation = 'travelAgents'
#occupation = 'insuranceUnderwriters'

GPT_input_occupation, plot_title_occupation, occupation_code, occupation_folder = pick_occupation(occupation)

In [516]:
plot_output_path = f'{occupation_folder}/plots'

In [517]:
# Read DAG datasets
manual_DAG_df = pd.read_csv(f'{occupation_folder}/{occupation}_manual_DAG_df.csv')
GPT_DAG_oneStep_df = pd.read_csv(f'{occupation_folder}/{occupation}_oneStepGPT_DAG_df.csv')
GPT_DAG_twoStep_df = pd.read_csv(f'{occupation_folder}/{occupation}_twoStepGPT_DAG_df.csv')
GPT_DAG_firstLastTask_df = pd.read_csv(f'{occupation_folder}/{occupation}_firstLastTaskGPT_DAG_df.csv')
GPT_DAG_conditioned_df = pd.read_csv(f'{occupation_folder}/{occupation}_conditionedGPT_DAG_df.csv')
GPT_DAG_partitioned_df = pd.read_csv(f'{occupation_folder}/{occupation}_partitionedGPT_DAG_df.csv')

# Add dummy comment column to manual DAG for compatibility reasons
manual_DAG_df['comment'] = ''

# Plot Interactive Graphs

In [518]:
plot_graphs(occupation = occupation, 
            df1 = manual_DAG_df, df2 = GPT_DAG_oneStep_df, 
            df1_comment = 'Only in Manual DAG', df2_comment = 'Only in One-Step GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - Manual (red) vs. One-Step GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_manual_vs_oneStepGPT_DAG.html')

### Takeaway: GPT creates too many edges, though always misses some of those edges present in the Manual case.

<br>

<br>

In [519]:
plot_graphs(occupation = occupation, 
            df1 = GPT_DAG_oneStep_df, df2 = GPT_DAG_twoStep_df, 
            df1_comment = 'Only in One-Step GPT DAG:\n', df2_comment = 'Only in Two-Step GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - One-Step GPT (red) vs. Two-Step GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_oneStep_vs_twoStep_GPT_DAG.html')

### Takeaway: Before code refactoring each has one or two edges unique to them, but after refactor they produce exactly the same DAGs.

<br>

<br>

In [520]:
plot_graphs(occupation = occupation, 
            df1 = manual_DAG_df, df2 = GPT_DAG_conditioned_df, 
            df1_comment = 'Only in Manual DAG', df2_comment = 'Only in Conditioned GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - Manual (red) vs. Conditioned GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_manual_vs_conditionedGPT_DAG.html')

### Takeaway: Triangles cuts too many edges, probably because edge removal algorithm is not perfect. (Refer to issues mentioned in df creation code)

<br>

<br>

In [521]:
plot_graphs(occupation = occupation, 
            df1 = manual_DAG_df, df2 = GPT_DAG_firstLastTask_df, 
            df1_comment = 'Only in Manual DAG', df2_comment = 'Only in First-Last Task GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - Manual (red) vs. First-Last Task GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_manual_vs_firstLastTaskGPT_DAG.html')

### Takeaway: First Last Task method creates too many edges like other first stage GPT outputs. Two edges unique to manual in travel agents occupation though the reasoning of GPT might be different in interpreting the sequence of tasks. So, not necessarily a bad thing. 

#### Seems like First Last Task method works better than one step. So, changed input of triangles to First Last Task.

<br>

<br>

In [522]:
plot_graphs(occupation = occupation, 
            df1 = GPT_DAG_oneStep_df, df2 = GPT_DAG_firstLastTask_df, 
            df1_comment = 'Only in One-Step GPT DAG:\n', df2_comment = 'Only in First-Last Task GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - One-Step GPT (red) vs. First-Last Task GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_oneStepGPT_vs_firstLastTaskGPT_DAG.html')

### Takeaway: For travel agents *seems* the unique last task is "Print" in One Step GPT method. So, a dozen differences between approaches. Both create too many edges compard to manual as a first stage output. Need to narrow down set of edges.

<br>

<br>

In [523]:
plot_graphs(occupation = occupation, 
            df1 = GPT_DAG_firstLastTask_df, df2 = GPT_DAG_conditioned_df, 
            df1_comment = 'Only in First-Last Task GPT DAG:\n', df2_comment = 'Only in Conditioned GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - First-Last Task GPT (red) vs. Conditioned GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_firstLastTaskGPT_vs_conditionedGPT_DAG.html')

In [524]:
plot_graphs(occupation = occupation, 
            df1 = GPT_DAG_conditioned_df, df2 = GPT_DAG_partitioned_df, 
            df1_comment = 'Only in Conditioned GPT DAG:\n', df2_comment = 'Only in Partitioned GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - Conditioned GPT (red) vs. Partitioned GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_conditionedGPT_vs_partitionedGPT_DAG.html')

In [525]:
plot_graphs(occupation = occupation, 
            df1 = manual_DAG_df, df2 = GPT_DAG_partitioned_df, 
            df1_comment = 'Only in Manual DAG', df2_comment = 'Only in Partitioned GPT DAG:\n',
            df1_unique_color = 'red', df2_unique_color = 'blue',
            graph_title = f'{plot_title_occupation} - Manual (red) vs. Partitioned GPT (blue) DAG',
            save_path = f'{plot_output_path}/{occupation}_manual_vs_partitionedGPT_DAG.html')

## Combine the main Four graphs into one HTML file

In [526]:
def create_combined_html(graph_files, output_file):
    # HTML content for the combined file
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Combined Graphs</title>
        <style>
            .container {
                display: grid;
                grid-template-columns: 50% 50%;
                grid-template-rows: 50% 50%;
                height: 100vh;
                margin: 0;
            }
            iframe {
                width: 100%;
                height: 100%;
                border: none;
            }
        </style>
    </head>
    <body>
        <div class="container">
    """
    
    # Add iframes for each graph file
    for graph_file in graph_files:
        html_content += f'<iframe src="{graph_file}"></iframe>\n'
    
    # Close the HTML tags
    html_content += """
        </div>
    </body>
    </html>
    """
    
    # Write the content to the output file
    with open(output_file, 'w') as file:
        file.write(html_content)

# Combine DAG HTML files
graph_paths = [f'{plot_output_path}/{occupation}_manual_vs_firstLastTaskGPT_DAG.html',
               f'{plot_output_path}/{occupation}_manual_vs_partitionedGPT_DAG.html',
               f'{plot_output_path}/{occupation}_manual_vs_conditionedGPT_DAG.html',
               f'{plot_output_path}/{occupation}_conditionedGPT_vs_partitionedGPT_DAG.html',
               ]
create_combined_html(graph_paths, f'{plot_output_path}/{occupation}_bigFour.html')