#### By: Peyman Shahidi
#### Created: Jan 23, 2025
#### Last Edit: Jan 23, 2025

<br>

In [13]:
#Python
import getpass
import numpy as np
import pandas as pd
from collections import defaultdict
import itertools
import random 

## formatting number to appear comma separated and with two digits after decimal: e.g, 1000 shown as 1,000.00
pd.set_option('float_format', "{:,.2f}".format)

import matplotlib.pyplot as plt
#%matplotlib inline
#from matplotlib.legend import Legend

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 200)

In [14]:
main_folder_path = ".."
input_data_path = f"{main_folder_path}/data/computed_objects"
output_data_path = f'{main_folder_path}/data/computed_objects'
output_plot_path = f"{main_folder_path}/writeup/plots"

In [15]:
# Create directories if they don't exist
import os

for path in [output_data_path, output_plot_path]:
    if not os.path.exists(path):
        os.makedirs(path)

### Main Code

In [16]:
ONET = pd.read_csv(f'{input_data_path}/ONET_cleaned_tasks.csv')

In [17]:
# Create crosswalk between O*NET-SOC Code and Detailed_Occupation_Code
crosswalk = ONET[["O*NET-SOC Code", "Occupation Title", "Detailed_Occupation_Code", "Detailed_Occupation_Title"]].drop_duplicates()
crosswalk = crosswalk.sort_values(["Detailed_Occupation_Code", "O*NET-SOC Code"]).reset_index(drop=True)
crosswalk.head(5)

Unnamed: 0,O*NET-SOC Code,Occupation Title,Detailed_Occupation_Code,Detailed_Occupation_Title
0,11-1011.00,Chief Executives,11-1011,Chief Executives
1,11-1011.03,Chief Sustainability Officers,11-1011,Chief Executives
2,11-1021.00,General and Operations Managers,11-1021,General and Operations Managers
3,11-2011.00,Advertising and Promotions Managers,11-2011,Advertising and Promotions Managers
4,11-2021.00,Marketing Managers,11-2021,Marketing Managers


In [18]:
# Subset to Detailed_Occupation_Codes with more than one O*NET-SOC Code
codes_with_multiple = crosswalk.groupby(["Detailed_Occupation_Code", "Detailed_Occupation_Title"]).filter(lambda x: len(x) > 1)
codes_with_multiple = codes_with_multiple.sort_values(["Detailed_Occupation_Code", "O*NET-SOC Code"]).reset_index(drop=True)
codes_with_multiple.head(5)

Unnamed: 0,O*NET-SOC Code,Occupation Title,Detailed_Occupation_Code,Detailed_Occupation_Title
0,11-1011.00,Chief Executives,11-1011,Chief Executives
1,11-1011.03,Chief Sustainability Officers,11-1011,Chief Executives
2,11-3031.00,Financial Managers,11-3031,Financial Managers
3,11-3031.01,Treasurers and Controllers,11-3031,Financial Managers
4,11-3031.03,Investment Fund Managers,11-3031,Financial Managers


In [19]:
codes_with_multiple.to_csv(f"{output_data_path}/repetitive_onet_detailedOcc_occ_crosswalk.csv", index=False)