# LLM-Based Occupational Risk Measurement (CPS + O*NET)

In [None]:
# Core
import pandas as pd
import numpy as np
import json
import time

# OpenAI
from openai import OpenAI
from google.colab import userdata


In [None]:
client = OpenAI(
    api_key=userdata.get("OpenAI_API")
)

Load CPS

In [None]:
pip install pyreadr

Collecting pyreadr
  Downloading pyreadr-0.5.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (1.3 kB)
Downloading pyreadr-0.5.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (776 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/776.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m776.2/776.2 kB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyreadr
Successfully installed pyreadr-0.5.4


In [None]:
import pyreadr

result = pyreadr.read_r("/content/individual-level-CPS-data.Rds")
cps = list(result.values())[0]

print("Observations:", cps.shape[0])
print("Variables:", cps.shape[1])

Observations: 834419
Variables: 34


load O*NET

In [None]:
onet_occ = pd.read_csv(
    "/content/Occupation Data.txt",
    sep="\t",
    encoding="utf-8"
)

onet_occ = onet_occ[["O*NET-SOC Code", "Title", "Description"]]
onet_occ.columns = ["soc", "title", "description"]

print("Occupations:", onet_occ.shape[0])
onet_occ.head()


Occupations: 1016


Unnamed: 0,soc,title,description
0,11-1011.00,Chief Executives,Determine and formulate policies and provide o...
1,11-1011.03,Chief Sustainability Officers,"Communicate and coordinate with management, sh..."
2,11-1021.00,General and Operations Managers,"Plan, direct, or coordinate the operations of ..."
3,11-1031.00,Legislators,"Develop, introduce, or enact laws and statutes..."
4,11-2011.00,Advertising and Promotions Managers,"Plan, direct, or coordinate advertising polici..."


LLM Input Text

In [None]:
onet_occ["llm_text"] = (
    "Occupation Title: " + onet_occ["title"] +
    "\nOccupation Description: " + onet_occ["description"]
)


Risk Classification Prompt

In [None]:
SYSTEM_PROMPT = """
You are an economist labeling occupational risk.
You must return ONLY a valid JSON object.
No markdown. No commentary. No explanation.
If unsure, still choose Low, Medium, or High.
"""


USER_PROMPT_TEMPLATE = """
Given the occupation below, classify risk levels.

Return JSON with exactly these keys:
- physical_risk
- financial_liability_risk
- cyclical_job_security_risk

Each value must be one of: Low, Medium, High.

{llm_text}
"""


Single-Occupation Labeling

In [None]:
def label_occupation(row):

    prompt = USER_PROMPT_TEMPLATE.format(
        llm_text=row["llm_text"]
    )

    response = client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    def safe_json_parse(text):
        text = text.strip()
        if text.startswith("```"):
            text = text.split("```")[1]
        return json.loads(text)


    parsed = safe_json_parse(response.choices[0].message.content)

    required = {"physical_risk", "financial_liability_risk", "cyclical_job_security_risk"}
    if not required.issubset(parsed.keys()):
        raise ValueError("Missing keys in LLM output")


    return {
        "soc": row["soc"],
        "title": row["title"],
        "physical_risk": parsed["physical_risk"],
        "financial_liability_risk": parsed["financial_liability_risk"],
        "cyclical_job_security_risk": parsed["cyclical_job_security_risk"]
    }

Test Batch

In [None]:
test_df = onet_occ.sample(n=25, random_state=42).copy()

results = []

for _, row in test_df.iterrows():
    try:
        out = label_occupation(row)
        results.append(out)
        time.sleep(0.15)
    except Exception as e:
        results.append({
            "soc": row["soc"],
            "title": row["title"],
            "error": str(e)
        })

risk_test = pd.DataFrame(results)
risk_test

Unnamed: 0,soc,title,physical_risk,financial_liability_risk,cyclical_job_security_risk
0,47-4041.00,Hazardous Materials Removal Workers,High,Medium,Medium
1,31-9093.00,Medical Equipment Preparers,Medium,Low,Low
2,19-1022.00,Microbiologists,Low,Medium,Medium
3,53-1043.00,First-Line Supervisors of Material-Moving Mach...,Medium,Medium,Medium
4,51-9061.00,"Inspectors, Testers, Sorters, Samplers, and We...",Medium,Low,Medium
5,33-2022.00,Forest Fire Inspectors and Prevention Specialists,High,Low,Medium
6,49-9061.00,Camera and Photographic Equipment Repairers,Medium,Low,Medium
7,17-3027.00,Mechanical Engineering Technologists and Techn...,Medium,Low,Medium
8,19-1029.03,Geneticists,Low,Medium,Medium
9,39-3012.00,Gambling and Sports Book Writers and Runners,Low,High,Medium


Validate Risk Labels

In [None]:
valid = {"Low", "Medium", "High"}

def is_valid(r):
    return (
        r.get("physical_risk") in valid and
        r.get("financial_liability_risk") in valid and
        r.get("cyclical_job_security_risk") in valid
    )

risk_test["is_valid"] = risk_test.apply(is_valid, axis=1)

print("Valid share:", risk_test["is_valid"].mean())

Valid share: 1.0


In [None]:
risk_test.to_csv("risk_test_validation.csv", index=False)

labeling Full O*NET

In [None]:
OUTPUT_PATH = "onet_risk_labels.csv"
CHECKPOINT_EVERY = 50
SLEEP_SEC = 0.15

In [None]:
import os

if os.path.exists(OUTPUT_PATH):
    risk_onet = pd.read_csv(OUTPUT_PATH)
    done_socs = set(risk_onet["soc"].astype(str))
    print("Resuming. Already labeled:", len(done_socs))
else:
    risk_onet = pd.DataFrame(columns=[
        "soc","title","physical_risk",
        "financial_liability_risk","cyclical_job_security_risk"
    ])
    done_socs = set()
    print("Fresh run.")

Fresh run.


In [None]:
new_rows = []
to_label = onet_occ[~onet_occ["soc"].astype(str).isin(done_socs)].copy()
print("Remaining to label:", len(to_label))

for i, (_, row) in enumerate(to_label.iterrows(), start=1):
    try:
        out = label_occupation(row)
        new_rows.append(out)
    except Exception:
        pass

    if i % CHECKPOINT_EVERY == 0:
        if new_rows:
            risk_onet = pd.concat([risk_onet, pd.DataFrame(new_rows)], ignore_index=True)
            risk_onet.to_csv(OUTPUT_PATH, index=False)
            new_rows = []
            print(f"Checkpoint saved at {len(risk_onet)} occupations")

    time.sleep(SLEEP_SEC)

# Final save
if new_rows:
    risk_onet = pd.concat([risk_onet, pd.DataFrame(new_rows)], ignore_index=True)
    risk_onet.to_csv(OUTPUT_PATH, index=False)

print("DONE. Total labeled:", len(risk_onet))

Remaining to label: 1016
Checkpoint saved at 50 occupations
Checkpoint saved at 100 occupations
Checkpoint saved at 150 occupations
Checkpoint saved at 200 occupations
Checkpoint saved at 250 occupations
Checkpoint saved at 300 occupations
Checkpoint saved at 350 occupations
Checkpoint saved at 400 occupations
Checkpoint saved at 450 occupations
Checkpoint saved at 500 occupations
Checkpoint saved at 550 occupations
Checkpoint saved at 600 occupations
Checkpoint saved at 650 occupations
Checkpoint saved at 700 occupations
Checkpoint saved at 750 occupations
Checkpoint saved at 800 occupations
Checkpoint saved at 850 occupations
Checkpoint saved at 900 occupations
Checkpoint saved at 950 occupations
Checkpoint saved at 1000 occupations
DONE. Total labeled: 1016


In [None]:
risk_onet.head()
risk_onet["physical_risk"].value_counts()


Unnamed: 0_level_0,count
physical_risk,Unnamed: 1_level_1
Low,447
Medium,347
High,222


In [None]:
print(len(risk_onet))

1016


crosswalk

In [None]:
import pandas as pd

cw_path = "/content/occ_occsoc_crosswalk_2000_onward_without_code_descriptions.csv"

cw_raw = pd.read_csv(
    cw_path,
    encoding="latin-1",   # <-- THIS IS THE KEY
    low_memory=False
)

print("crosswalk shape:", cw_raw.shape)
print(cw_raw.columns.tolist())
cw_raw.head(10)

crosswalk shape: (836, 16)
['Unnamed: 0', '2000 1% Census OCC code', '2000 5% Census OCC code', '2000-2004 ACS OCC code', '2005-2009 ACS/PRCS OCC code', '2010-2012 ACS/PRCS OCC code', '2013-2017 ACS/PRCS OCC code', '2018 ACS/PRCS OCC code', '2000 1% Census OCCSOC code', '2000 5% Census OCCSOC code', '2000-2004 ACS OCCSOC code', '2005-2009 ACS/PRCS OCCSOC code', '2010-2012 ACS/PRCS OCCSOC', '2013-2017 ACS/PRCS OCCSOC code', '2018 Onward ACS/PRCS', 'Occupation title']


Unnamed: 0.1,Unnamed: 0,2000 1% Census OCC code,2000 5% Census OCC code,2000-2004 ACS OCC code,2005-2009 ACS/PRCS OCC code,2010-2012 ACS/PRCS OCC code,2013-2017 ACS/PRCS OCC code,2018 ACS/PRCS OCC code,2000 1% Census OCCSOC code,2000 5% Census OCCSOC code,2000-2004 ACS OCCSOC code,2005-2009 ACS/PRCS OCCSOC code,2010-2012 ACS/PRCS OCCSOC,2013-2017 ACS/PRCS OCCSOC code,2018 Onward ACS/PRCS,Occupation title
0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,Not Applicable (Under 16 years or not in the l...
1,"MANAGEMENT, BUSINESS, SCIENCE, AND ARTS OCCUPA...",,,,,,,,,,,,,,,
2,Management Occupations:,,,,,,,,,,,,,,,
3,,1.0,1.0,10.0,,,,,111011.0,111011.0,111011.0,,,,,Chief Executives
4,,,,,10.0,10.0,10.0,10.0,,,,1110XX,1110XX,1110XX,1110XX,Chief Executives and Legislators
5,,2.0,2.0,20.0,20.0,20.0,20.0,20.0,111021.0,111021.0,111021.0,111021,111021,111021,111021,General and Operations Managers
6,,3.0,3.0,30.0,,,,,111031.0,111031.0,111031.0,,,,,Legislators
7,,4.0,4.0,40.0,40.0,40.0,40.0,40.0,112011.0,112011.0,112011.0,112011,112011,112011,112011,Advertising and Promotions Managers
8,,5.0,5.0,50.0,50.0,50.0,50.0,,112020.0,112020.0,112020.0,112020,112020,112020,,Marketing and Sales Managers
9,,,,,,,,51.0,,,,,,,112021,Marketing Managers


In [None]:
cw_raw.columns.tolist()

['Unnamed: 0',
 '2000 1% Census OCC code',
 '2000 5% Census OCC code',
 '2000-2004 ACS OCC code',
 '2005-2009 ACS/PRCS OCC code',
 '2010-2012 ACS/PRCS OCC code',
 '2013-2017 ACS/PRCS OCC code',
 '2018 ACS/PRCS OCC code',
 '2000 1% Census OCCSOC code',
 '2000 5% Census OCCSOC code',
 '2000-2004 ACS OCCSOC code',
 '2005-2009 ACS/PRCS OCCSOC code',
 '2010-2012 ACS/PRCS OCCSOC',
 '2013-2017 ACS/PRCS OCCSOC code',
 '2018 Onward ACS/PRCS',
 'Occupation title']

In [None]:
# 1) Select the right columns (matching your actual column names)
cw = cw_raw[[
    "2018 ACS/PRCS OCC code",
    "2018 Onward ACS/PRCS",
    "Occupation title"
]].copy()

# 2) Rename cleanly
cw.columns = ["occ", "soc", "title"]

cw.head(10)

Unnamed: 0,occ,soc,title
0,0.0,0,Not Applicable (Under 16 years or not in the l...
1,,,
2,,,
3,,,Chief Executives
4,10.0,1110XX,Chief Executives and Legislators
5,20.0,111021,General and Operations Managers
6,,,Legislators
7,40.0,112011,Advertising and Promotions Managers
8,,,Marketing and Sales Managers
9,51.0,112021,Marketing Managers


In [None]:
import pandas as pd

cw["occ"] = pd.to_numeric(cw["occ"], errors="coerce")
cw["soc"] = pd.to_numeric(cw["soc"], errors="coerce")

cw = cw.dropna(subset=["occ", "soc"]).copy()
cw["occ"] = cw["occ"].astype(int)
cw["soc"] = cw["soc"].astype(int)

print("Clean rows:", len(cw))
cw.head(10)

Clean rows: 472


Unnamed: 0,occ,soc,title
0,0,0,Not Applicable (Under 16 years or not in the l...
5,20,111021,General and Operations Managers
7,40,112011,Advertising and Promotions Managers
9,51,112021,Marketing Managers
10,52,112022,Sales Managers
13,60,112030,Public Relations and Fundraising Managers
15,101,113012,Administrative Services Managers
16,102,113013,Facilities Managers
17,110,113021,Computer and Information Systems Managers
18,120,113031,Financial Managers


In [None]:
print("Unique OCC:", cw["occ"].nunique())
print("Unique SOC:", cw["soc"].nunique())

cw.to_csv("occ_to_soc_2018_clean.csv", index=False)

Unique OCC: 472
Unique SOC: 472


A) Build a clean OCC→SOC crosswalk for your CPS years (2018–2022)

In [None]:
import pandas as pd
import numpy as np

cw_path = "/content/occ_occsoc_crosswalk_2000_onward_without_code_descriptions.csv"

cw_raw = pd.read_csv(cw_path, encoding="latin-1", low_memory=False)

# Identify the right columns (exact names from your printout)
OCC_COL   = "2018 ACS/PRCS OCC code"
SOC_COL   = "2018 Onward ACS/PRCS"
TITLE_COL = "Occupation title"

cw = cw_raw[[OCC_COL, SOC_COL, TITLE_COL]].copy()
cw.columns = ["occ", "soc", "title"]

# Clean: drop non-data rows, drop wildcards like '1110XX', coerce occ to int
cw["occ"] = pd.to_numeric(cw["occ"], errors="coerce")
cw["soc"] = cw["soc"].astype(str).str.strip()
cw["title"] = cw["title"].astype(str).str.strip()

# Keep only rows with real occ and real soc codes
cw = cw.dropna(subset=["occ"])
cw = cw[cw["soc"].str.fullmatch(r"\d+")]   # keep only pure numeric SOC-like codes
cw["occ"] = cw["occ"].astype(int)

# Drop duplicates if any
cw = cw.drop_duplicates(subset=["occ"], keep="first").reset_index(drop=True)

print("Clean rows:", len(cw))
print("Unique OCC:", cw["occ"].nunique())
print("Unique SOC:", cw["soc"].nunique())

cw.to_csv("/content/occ_to_soc_2018_clean.csv", index=False)
cw.head(10)


Clean rows: 472
Unique OCC: 472
Unique SOC: 472


Unnamed: 0,occ,soc,title
0,0,0,Not Applicable (Under 16 years or not in the l...
1,20,111021,General and Operations Managers
2,40,112011,Advertising and Promotions Managers
3,51,112021,Marketing Managers
4,52,112022,Sales Managers
5,60,112030,Public Relations and Fundraising Managers
6,101,113012,Administrative Services Managers
7,102,113013,Facilities Managers
8,110,113021,Computer and Information Systems Managers
9,120,113031,Financial Managers


Merge into CPS

In [None]:
# Load clean crosswalk
cw = pd.read_csv("/content/occ_to_soc_2018_clean.csv")

# Make sure CPS occ is numeric int (IPUMS sometimes stores as float)
cps["occ"] = pd.to_numeric(cps["occ"], errors="coerce")
before = len(cps)
cps = cps.dropna(subset=["occ"]).copy()
cps["occ"] = cps["occ"].astype(int)

# Merge
cps = cps.merge(cw[["occ", "soc"]], on="occ", how="left")

print("CPS rows:", len(cps), " (was", before, ")")
print("SOC missing share:", cps["soc"].isna().mean())
cps[["occ", "soc"]].head()

CPS rows: 406632  (was 834419 )
SOC missing share: 0.21567412303999686


Unnamed: 0,occ,soc
0,6200,471011.0
1,4720,412010.0
2,7700,511011.0
3,205,119013.0
4,2145,232011.0


In [None]:
# Ensure same type
risk_onet["soc"] = risk_onet["soc"].astype(str).str.replace("-", "").str.strip()
cps["soc"] = cps["soc"].astype(str).str.strip()

# Merge risks onto CPS
cps = cps.merge(
    risk_onet[["soc", "physical_risk", "financial_liability_risk", "cyclical_job_security_risk"]],
    on="soc",
    how="left"
)

print("Risk missing share (physical):", cps["physical_risk"].isna().mean())
cps[["occ","soc","physical_risk","financial_liability_risk","cyclical_job_security_risk"]].head()

Risk missing share (physical): 1.0


Unnamed: 0,occ,soc,physical_risk,financial_liability_risk,cyclical_job_security_risk
0,6200,471011.0,,,
1,4720,412010.0,,,
2,7700,511011.0,,,
3,205,119013.0,,,
4,2145,232011.0,,,


In [None]:
import pandas as pd

risk_onet = pd.read_csv("/content/onet_risk_labels.csv")

print("O*NET risk rows:", risk_onet.shape)
print(risk_onet.columns.tolist())
risk_onet.head()

O*NET risk rows: (1016, 5)
['soc', 'title', 'physical_risk', 'financial_liability_risk', 'cyclical_job_security_risk']


Unnamed: 0,soc,title,physical_risk,financial_liability_risk,cyclical_job_security_risk
0,11-1011.00,Chief Executives,Low,High,High
1,11-1011.03,Chief Sustainability Officers,Low,Medium,Medium
2,11-1021.00,General and Operations Managers,Low,High,Medium
3,11-1031.00,Legislators,Low,Medium,High
4,11-2011.00,Advertising and Promotions Managers,Low,Medium,High


In [None]:
# CPS side
cps["soc"] = cps["soc"].astype(str).str.replace(".0", "", regex=False)

# O*NET side
risk_onet["soc"] = risk_onet["soc"].astype(str).str.replace(".", "", regex=False)

In [None]:
import pandas as pd

# --- 1) Build soc6 keys exactly as you already did ---
cps["soc6"] = (
    cps["soc"]
      .astype(str)
      .str.replace(".0", "", regex=False)
      .str.strip()
)

risk_onet["soc6"] = (
    risk_onet["soc"]
      .astype(str)
      .str.replace("-", "", regex=False)
      .str.replace(".", "", regex=False)
      .str.strip()
      .str[:6]
)

# --- 2) Collapse duplicates on the O*NET side ---
risk_cols = ["physical_risk", "financial_liability_risk", "cyclical_job_security_risk"]

# simple mode function (most common label)
def mode_label(s):
    s = s.dropna()
    if len(s) == 0:
        return pd.NA
    return s.value_counts().idxmax()

risk_onet_soc6 = (
    risk_onet
      .groupby("soc6", as_index=False)[risk_cols]
      .agg(mode_label)
)

print("risk_onet rows:", len(risk_onet), "unique soc6:", risk_onet["soc6"].nunique())
print("collapsed rows:", len(risk_onet_soc6), "unique soc6:", risk_onet_soc6["soc6"].nunique())

# --- 3) Drop any old suffixed risk columns in cps (you already saw _x/_y) ---
for col in risk_cols:
    drop_cols = [c for c in cps.columns if c == col or c.startswith(col + "_")]
    if drop_cols:
        cps = cps.drop(columns=drop_cols)

# --- 4) Merge (now valid many-to-one) ---
cps = cps.merge(
    risk_onet_soc6,
    on="soc6",
    how="left",
    validate="m:1"
)

# --- 5) Check coverage ---
print("Risk missing share (physical):", cps["physical_risk"].isna().mean())
cps[["occ","soc","soc6"] + risk_cols].head(10)


risk_onet rows: 1016 unique soc6: 867
collapsed rows: 867 unique soc6: 867
Risk missing share (physical): 0.40878238800684646


Unnamed: 0,occ,soc,soc6,physical_risk,financial_liability_risk,cyclical_job_security_risk
0,6200,471011,471011,High,Medium,High
1,4720,412010,412010,,,
2,7700,511011,511011,Medium,Medium,Medium
3,205,119013,119013,High,Medium,High
4,2145,232011,232011,Low,Medium,Medium
5,8320,516031,516031,Medium,Low,High
6,205,119013,119013,High,Medium,High
7,205,119013,119013,High,Medium,High
8,4510,395012,395012,Medium,Low,Medium
9,205,119013,119013,High,Medium,High


In [None]:
print("Overlap count:",
      len(set(cps["soc6"].dropna()) & set(risk_onet_soc6["soc6"].dropna())))

Overlap count: 377


Reports

In [None]:
cps_with_risk = cps.dropna(subset=["physical_risk"]).copy()
cps_no_risk   = cps[cps["physical_risk"].isna()].copy()

risk_map = {"Low": 0, "Medium": 1, "High": 2}

for col in ["physical_risk", "financial_liability_risk", "cyclical_job_security_risk"]:
    cps_with_risk.loc[:, col + "_num"] = cps_with_risk[col].map(risk_map)


In [None]:
cps_with_risk[[c + "_num" for c in ["physical_risk","financial_liability_risk","cyclical_job_security_risk"]]].describe()


Unnamed: 0,physical_risk_num,financial_liability_risk_num,cyclical_job_security_risk_num
count,240408.0,240408.0,240408.0
mean,0.788235,0.712618,1.158148
std,0.801959,0.606631,0.577341
min,0.0,0.0,0.0
25%,0.0,0.0,1.0
50%,1.0,1.0,1.0
75%,1.0,1.0,2.0
max,2.0,2.0,2.0


In [None]:
analysis_df = cps_with_risk.copy()

In [None]:
analysis_df["construction"] = (analysis_df["ind"] == 770).astype(int)

analysis_df["phys_high"] = (analysis_df["physical_risk_num"] == 2).astype(int)
analysis_df["cycle_high"] = (analysis_df["cyclical_job_security_risk_num"] == 2).astype(int)

In [None]:
analysis_df[["construction", "phys_high", "cycle_high"]].mean()

Unnamed: 0,0
construction,0.086565
phys_high,0.238108
cycle_high,0.25824


In [None]:
print("Observations:", cps.shape[0])
print("Variables:", cps.shape[1])

Observations: 406632
Variables: 39


In [None]:
onet_occ.head()


Unnamed: 0,soc,title,description,llm_text
0,11-1011.00,Chief Executives,Determine and formulate policies and provide o...,Occupation Title: Chief Executives\nOccupation...
1,11-1011.03,Chief Sustainability Officers,"Communicate and coordinate with management, sh...",Occupation Title: Chief Sustainability Officer...
2,11-1021.00,General and Operations Managers,"Plan, direct, or coordinate the operations of ...",Occupation Title: General and Operations Manag...
3,11-1031.00,Legislators,"Develop, introduce, or enact laws and statutes...",Occupation Title: Legislators\nOccupation Desc...
4,11-2011.00,Advertising and Promotions Managers,"Plan, direct, or coordinate advertising polici...",Occupation Title: Advertising and Promotions M...


In [None]:
risk_onet["physical_risk"].value_counts()

Unnamed: 0_level_0,count
physical_risk,Unnamed: 1_level_1
Low,447
Medium,347
High,222


In [None]:
print("Risk missing share (physical):", cps["physical_risk"].isna().mean())

Risk missing share (physical): 0.40878238800684646


In [None]:
analysis_df[["phys_high", "cycle_high"]].mean()


Unnamed: 0,0
phys_high,0.238108
cycle_high,0.25824


In [None]:
analysis_df[["phys_high", "cycle_high"]].mean()


Unnamed: 0,0
phys_high,0.238108
cycle_high,0.25824


In [None]:
analysis_df[["construction", "phys_high", "cycle_high"]].mean()


Unnamed: 0,0
construction,0.086565
phys_high,0.238108
cycle_high,0.25824
