# LLM interpretation
- Using GPT, Claude, Llama model
- Representative model : GPT o4-mini model (API)

In [None]:
df_q1_clean_idx = pd.read_csv('q1q4_keywords_idx.csv')

df_q1_clean_idx.index = ['V2', 'V3', 'V6', 'V7', 'V12', 'V13', 'V18']
df_q1_clean_idx

In [None]:
pip install uv langchain langchain_openai python-dotenv ipykernel

In [None]:
OPENAI_API_KEY = #private key

# PythonÏóêÏÑú Î°úÎìú
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = #private key

from openai import OpenAI
client = OpenAI()

def generate_response(system_message, user_message):

    # system + user message -> input
    prompt = f"""
    <system>
    {system_message}
    </system>

    <user>
    {user_message}
    </user>
    """

    response = client.responses.create(
        model="o4-mini",
        input=prompt
    )

    return response.output_text

In [None]:
def generate_tech_abstract(space_id, filtered_df, top_n=30):
    # space_id
    if isinstance(space_id, int):
        space_id_str = f"V{space_id}"
    else:
        space_id_str = str(space_id)

    if space_id_str not in filtered_df.index:
        raise ValueError(
            f"space_id '{space_id_str}' not found in filtered_df. "
            f"Available: {list(filtered_df.index)[:10]}..."
        )

    # tech column parsing
    tech_str = filtered_df.loc[space_id_str, 'tech']
    matches = re.findall(r'([^\s]+)\s+\(([\d\.]+)\)', tech_str)
    tech_keywords = {('tech', kw): float(w) for kw, w in matches}

    top_keywords = sorted(tech_keywords.items(), key=lambda x: x[1], reverse=True)[:top_n]

    weighted_keywords_with_origin = [
        f"{keyword} ({weight:.3f}, origin: {origin})"
        for (origin, keyword), weight in top_keywords
    ]
    formatted_keywords = ", ".join(weighted_keywords_with_origin)

    # ----------------------------
    # prompt
    # ----------------------------
    system_message = """
      You are an expert in on-device AI and embedded machine learning systems.
      Your job is to generate highly specific, technically coherent abstracts strictly grounded in the provided weighted keywords.
      The abstract must remain within the semantic scope suggested by the keywords.
      """

    user_message = f"""
      [Weighted keywords]
      {formatted_keywords}

      [Purpose]
      The weighted keywords represent a potential unexplored area in the on-device AI technology domain.
      Your objective is to interpret the weighted keywords and propose a novel, coherent technical mechanism aligned with their collective meaning.

      [Generation rules]
      1. Remain strictly within the **single technical domain** inferred from the highest-weight keywords
        across the keyword's origin.
        - You are NOT required to use all keywords.
        - Lower-weight keywords may be ignored unless they naturally reinforce the dominant concept.
      2. Identify **one unifying dominant concept** that both domains can converge on.
        - This concept must be the structural center of the entire abstract.
        - All mechanisms must be derived from, and consistent with, this dominant concept.
      3. Describe a coherent technical mechanism that explains:
        - the problem implied jointly,
        - the on-device AI method or architecture addressing it,
        - and how that origin(tech) contributes functionally to different stages of the mechanism.
      4. The abstract must describe a **plausible and detailed on-device architecture**, including: modules, pipelines, computational steps and data flow,
      5. DO NOT introduce any fields or topics outside the unified domain.
        - No extra modalities, no unrelated algorithms, no foreign technologies.
      6. Length: ~500‚Äì600 characters. Style: patent-like, mechanism-focused, concise.
      7. Do NOT call the idea ‚Äúnovel.‚Äù
        - Novelty must emerge implicitly from the mechanism logically derived from the keyword distribution.

      [Output format]
      *abstract: [Write the abstract here.]
      *reason: [Explain in 2‚Äì3 sentences how the dominant keywords shaped the technical mechanism.]
      """

    print("ü§ñ  generate_response start")
    response = generate_response(system_message, user_message)
    print("üëÄ  generate_response result:", repr(response))
    return response

In [None]:
def generate_both_abstract(space_id, filtered_df, top_n=20):
    # space_id
    if isinstance(space_id, int):
        space_id_str = f"V{space_id}"
    else:
        space_id_str = str(space_id)

    if space_id_str not in filtered_df.index:
        raise ValueError(
            f"space_id '{space_id_str}' not found in filtered_df. "
            f"Available: {list(filtered_df.index)[:10]}..."
        )

    overlap_str = filtered_df.loc[space_id_str, 'overlapping']
    tech_str    = filtered_df.loc[space_id_str, 'tech']

    overlap_str = "" if pd.isna(overlap_str) else overlap_str
    tech_str    = "" if pd.isna(tech_str) else tech_str

    overlap_matches = re.findall(r'([^\s]+)\s+\(([\d\.]+)\)', overlap_str)
    tech_matches    = re.findall(r'([^\s]+)\s+\(([\d\.]+)\)', tech_str)

    both_keywords = {}
    for kw, w in overlap_matches:
        both_keywords[('overlapping', kw)] = float(w)
    for kw, w in tech_matches:
        both_keywords[('tech', kw)] = float(w)

    top_keywords = sorted(both_keywords.items(), key=lambda x: x[1], reverse=True)[:top_n]

    weighted_keywords_with_origin = [
        f"{keyword} ({weight:.3f}, origin: {origin})"
        for (origin, keyword), weight in top_keywords
    ]
    formatted_keywords = ", ".join(weighted_keywords_with_origin)

    # ----------------------------
    # prompt
    # ----------------------------
    system_message = """
      You are an expert in on-device AI and embedded machine learning systems.
      Your job is to generate highly specific, technically coherent abstracts strictly grounded in the provided weighted keywords.
      The abstract must remain within the semantic scope suggested by the keywords.
      """

    user_message = f"""
      [Weighted keywords]
      {formatted_keywords}

      [Purpose]
      You are given two domains of keywords: 'tech' and 'overlapping'.
      They jointly suggest a potential unexplored direction in on-device AI.
      Your objective is to integrate both domains into a single, novel, coherent technical mechanism.

      [Generation rules]
      1. Remain strictly within the **single technical domain** inferred from the highest-weight keywords
        across both origins (tech + overlapping).
        - You are NOT required to use all keywords.
        - Lower-weight keywords may be ignored unless they naturally reinforce the dominant concept.
      2. Identify **one unifying dominant concept** that both domains can converge on.
        - This concept must be the structural center of the entire abstract.
        - All mechanisms must be derived from, and consistent with, this dominant concept.
      3. Describe a coherent technical mechanism that explains:
        - the problem implied jointly by the tech and overlapping signals,
        - the on-device AI method or architecture addressing it,
        - and how each origin (tech / overlapping) contributes functionally
          to different stages of the mechanism.
      4. The abstract must describe a **plausible and detailed on-device architecture**, including:
        - modules, pipelines, or hardware‚Äìfirmware interaction,
        - computational steps and data flow,
        - without diverging from the dominant concept.
      5. DO NOT introduce any fields or topics outside the unified domain.
        - No extra modalities, no unrelated algorithms, no foreign technologies.
      6. Length: ~500‚Äì600 characters. Style: patent-like, mechanism-focused, concise.
      7. Do NOT call the idea ‚Äúnovel.‚Äù
        - Novelty must emerge implicitly from the mechanism logically derived from the keyword distribution.

      [Output format]
      *abstract: [Write the abstract here.]
      *reason: [Explain in 2‚Äì3 sentences how you combined 'tech' and 'overlapping' signals into one coherent mechanism.]
      """

    print("ü§ñ  generate_response start")
    response = generate_response(system_message, user_message)
    print("üëÄ  generate_response result:", repr(response))
    return response

In [None]:
import re
target_space_ids = [f'V{i}' for i in [2, 3, 6, 7, 12, 13, 18]]

#tech(baseline) interpretation
results_df = pd.DataFrame(columns=["space_id", "abstract(tech)"])

import time

for space_id in target_space_ids:
    print(f"\n===================== {space_id} =====================")
    try:
        abstract = generate_tech_abstract(space_id, df_q1_clean_idx)
        new_row = pd.DataFrame({
            'space_id': [space_id],
            'abstract(tech)': [abstract]
        })
        results_df = pd.concat([results_df, new_row], ignore_index=True)
        print(f"Completed abstract for space ID {space_id}")
        time.sleep(1)
    except Exception as e:
        print(f"Error for space ID {space_id} ‚Üí {e}")
        continue


# BOTH(ours) interpretation
if 'abstract(both)' not in results_df.columns:
    results_df['abstract(both)'] = None

for space_id in target_space_ids:
    print(f"\n[BOTH] ===================== {space_id} =====================")
    try:
        abstract_both = generate_both_abstract(space_id, df_q1_clean_idx)
        results_df.loc[results_df['space_id'] == space_id, 'abstract(both)'] = abstract_both
        print(f"Completed BOTH abstract for space ID {space_id}")
        time.sleep(1)
    except Exception as e:
        print(f"Error (BOTH) for space ID {space_id} ‚Üí {e}")
        continue

results_df.to_csv("abstract_gpt_5.csv", index=False)
print("\n‚≠ê Clear! abstract_gpt_5.csv")

# Validation

Embedding (generated text)
- GPT 5 sets
- Claude 5 sets
- Llama 5 sets

In [None]:
!pip install sentence-transformers

In [None]:
#embedding

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

save_path = "/content/LLMtext"
model = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')

abs_gpt_1 = pd.read_csv(f"{save_path}/abstract_gpt_1.csv", encoding='cp949')
abs_gpt_2 = pd.read_csv(f"{save_path}/abstract_gpt_2.csv", encoding='cp949')
abs_gpt_3 = pd.read_csv(f"{save_path}/abstract_gpt_3.csv", encoding='cp949')
abs_gpt_4 = pd.read_csv(f"{save_path}/abstract_gpt_4.csv", encoding='cp949')
abs_gpt_5 = pd.read_csv(f"{save_path}/abstract_gpt_5.csv", encoding='cp949')

abs_claude_1 = pd.read_csv(f"{save_path}/abstract_claude_1.csv", encoding='cp949')
abs_claude_2 = pd.read_csv(f"{save_path}/abstract_claude_2.csv", encoding='cp949')
abs_claude_3 = pd.read_csv(f"{save_path}/abstract_claude_3.csv", encoding='cp949')
abs_claude_4 = pd.read_csv(f"{save_path}/abstract_claude_4.csv", encoding='cp949')
abs_claude_5 = pd.read_csv(f"{save_path}/abstract_claude_5.csv", encoding='cp949')

abs_llama_1 = pd.read_csv(f"{save_path}/abstract_llama_1.csv", encoding='cp949')
abs_llama_2 = pd.read_csv(f"{save_path}/abstract_llama_2.csv", encoding='cp949')
abs_llama_3 = pd.read_csv(f"{save_path}/abstract_llama_3.csv", encoding='cp949')
abs_llama_4 = pd.read_csv(f"{save_path}/abstract_llama_4.csv", encoding='cp949')
abs_llama_5 = pd.read_csv(f"{save_path}/abstract_llama_5.csv", encoding='cp949')

In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

save_path = "/content/drive/MyDrive/Colab Notebooks/LLMtext"

# embedding model
model = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')

# text columns name
base_text_cols = [
    'gpt(tech)',
    'gpt(both)',
    'claude(tech)',
    'claude(both)',
    'llama(tech)',
    'llama(both)',
]

# 1~5 repeat
for i in range(1, 6):
    print(f"\n===== {i}ÌöåÏ∞® Ï≤òÎ¶¨ ÏãúÏûë =====")

    dfs = []

    # get gpt_i, claude_i, llama_i
    for model_name in ['gpt', 'claude', 'llama']:
        var_name = f'abs_{model_name}_{i}'
        if var_name in globals():
            print(f" - {var_name} ÏÇ¨Ïö©")
            dfs.append(globals()[var_name])
        else:
            print(f" - {var_name} ÏóÜÏùå (Ïä§ÌÇµ)")

    if not dfs:
        print(f"{i}ÌöåÏ∞®: ÏÇ¨Ïö©Ìï† DataFrameÏù¥ ÏóÜÏñ¥ Ïä§ÌÇµÌï©ÎãàÎã§.")
        continue

    val = pd.concat(dfs, axis=1)
    val = val.loc[:, ~val.columns.duplicated()]

    text_cols = [col for col in base_text_cols if col in val.columns]

    print(f"{i}: emb col -> {text_cols}")

    # embedding by columns
    for col in text_cols:
        emb_col = col.replace('(', '_').replace(')', '').replace(' ', '_') + '_emb'
        print(f"Encoding column: {col} -> {emb_col}")

        texts = val[col].fillna("").astype(str).tolist()
        embeddings = model.encode(texts, show_progress_bar=True)

        val[emb_col] = embeddings.tolist()

    pkl_path = f"{save_path}/abstract_all_{i}_with_embeddings.pkl"
    val.to_pickle(pkl_path)

    print(f"{i}: save -> {pkl_path}")

Embedding (train data)

In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

model = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')

train2 = pd.read_csv(f"{save_path}/train_data.csv")
train_texts = train2['text'].tolist()

train_embeddings = model.encode(train_texts, batch_size=16, show_progress_bar=True)
print("train_embeddings.shape:", train_embeddings.shape)

train2['embedding'] = train_embeddings.tolist()
train2.to_csv(f"{save_path}/train_emb2.csv", index=False)

# Novelty (LOF score)

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import LocalOutlierFactor
import ast
from sentence_transformers import SentenceTransformer

# ======================================================
# 1. Train embedding
# ======================================================
train_emb = pd.read_csv(f"{save_path}/train_emb2.csv")

train_emb["embedding"] = train_emb["embedding"].apply(ast.literal_eval)

# (n_train, dim) array
train_embeddings_arr = np.vstack(train_emb["embedding"].values)

# ======================================================
# 2. LOF train
# ======================================================
lof = LocalOutlierFactor(
    n_neighbors=20,
    metric='cosine',
    novelty=True
)
lof.fit(train_embeddings_arr)

# ======================================================
# 3. LOF score
# ======================================================
target_columns = {
    'gpt_tech_emb'    : 'gpt_tech_lof',
    'gpt_both_emb'    : 'gpt_both_lof',
    'claude_tech_emb' : 'claude_tech_lof',
    'claude_both_emb' : 'claude_both_lof',
    'llama_tech_emb'  : 'llama_tech_lof',
    'llama_both_emb'  : 'llama_both_lof'
}

for i in range(1, 6):
    print(f"\n===== {i} LOF start =====")

    pkl_path = f"{save_path}/abstract_all_{i}_with_embeddings.pkl"

    try:
        val = pd.read_pickle(pkl_path)
    except FileNotFoundError:
        print(f" - {pkl_path} none ‚Üí skip\n")
        continue

    existing_emb_cols = [col for col in target_columns.keys() if col in val.columns]
    print(f" - emb col: {existing_emb_cols}")

    for emb_col in existing_emb_cols:
        score_col = target_columns[emb_col]
        print(f"   LOF scoring: {emb_col} -> {score_col}")

        target_embeddings = np.array(val[emb_col].tolist())

        # LOF score
        lof_scores = lof.decision_function(target_embeddings)
        val[score_col] = np.round(lof_scores, 4)

    novelty_csv = f"{save_path}/val_novelty_{i}.csv"
    score_cols = [target_columns[c] for c in existing_emb_cols]

    val_subset = val[['space_id'] + score_cols].copy()
    val_subset = val_subset.rename(columns={'space_id': 'vacancy_num'})

    val_subset.to_csv(novelty_csv, index=False, encoding='utf-8-sig')
    print(f" - save ‚Üí {novelty_csv}")


In [None]:
import pandas as pd
novelty1 = pd.read_csv(f"{save_path}/val_novelty_1.csv")
novelty2 = pd.read_csv(f"{save_path}/val_novelty_2.csv")
novelty3 = pd.read_csv(f"{save_path}/val_novelty_3.csv")
novelty4 = pd.read_csv(f"{save_path}/val_novelty_4.csv")
novelty5 = pd.read_csv(f"{save_path}/val_novelty_5.csv")

# Feasibility (ours/baseline)



In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

test = pd.read_csv(f"{save_path}/test_data.csv")

test_texts = test['text'].tolist()

model = SentenceTransformer('AI-Growth-Lab/PatentSBERTa')

test_embeddings = model.encode(test_texts, show_progress_bar=True)
test['embedding'] = test_embeddings.tolist()

test.to_csv(f"{save_path}/test_emb.csv", index=False)

In [None]:
import ast
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

save_path = "/content/drive/MyDrive/Colab Notebooks/LLMtext"
print()
# ---------------------------
# 1. train / test embedding load
# ---------------------------
test = pd.read_csv(f"{save_path}/test_emb.csv")
train_df = pd.read_csv(f"{save_path}/train_emb2.csv")

if isinstance(train_df['embedding'].iloc[0], str):
    train_df['embedding'] = train_df['embedding'].apply(ast.literal_eval)

# train embeddings (list of list ‚Üí array)
train_embeddings = train_df['embedding'].to_numpy()
if isinstance(train_embeddings[0], str):
    train_embeddings = [ast.literal_eval(e) for e in train_embeddings]
train_matrix = np.vstack(train_embeddings)   # (n_train, dim)

if isinstance(test['embedding'].iloc[0], str):
    test['embedding'] = test['embedding'].apply(ast.literal_eval)
test_matrix  = np.vstack(test['embedding'].to_numpy())  # (n_test, dim)

train_test_sim = cosine_similarity(train_matrix, test_matrix)  # (n_train, n_test)


# ---------------------------
model_emb_cols = {
    "gpt": "gpt_both_emb",
    "claude": "claude_both_emb",
    "llama": "llama_both_emb",
}

top_k = 10

# ---------------------------
#  1~5 repeat
# ---------------------------
for i in range(1, 6):
    print(f"\n===== {i}ÌöåÏ∞® Feasibility F-ratio (Top-{top_k}) start =====")

    pkl_path = f"{save_path}/abstract_all_{i}_with_embeddings.pkl"

    try:
        val = pd.read_pickle(pkl_path)
    except FileNotFoundError:
        print(f" - {pkl_path} none ‚Üí skip")
        continue

    feas_table = pd.DataFrame({'vacancy_num': val['space_id']})

    for model_name, emb_col in model_emb_cols.items():
        if emb_col not in val.columns:
            print(f" - {emb_col} none ‚Üí {model_name} skip")
            continue

        print(f"Processing model: {model_name} (col: {emb_col})")

        if isinstance(val[emb_col].iloc[0], str):
            val[emb_col] = val[emb_col].apply(ast.literal_eval)

        # (n_val, embed_dim)
        val_matrix = np.vstack(val[emb_col].to_numpy())

        # 1) generated ‚Üí train similarity : find ancestor train(top-1)
        sim_val_to_train = cosine_similarity(val_matrix, train_matrix)  # (n_val, n_train)
        nearest_train_idx = sim_val_to_train.argmax(axis=1)             # (n_val,)

        # 2) baseline(model@): train ‚Üí test similarity top-k
        baseline_mat = train_test_sim[nearest_train_idx]                # (n_val, n_test)
        baseline_sorted = np.sort(baseline_mat, axis=1)[:, ::-1]
        baseline_topk = baseline_sorted[:, :top_k]                      # (n_val, top_k)
        baseline_topk_avg = baseline_topk.mean(axis=1)                  # (n_val,)

        # 3) ours(model@): generated ‚Üí test similarity top-k
        sim_val_to_test = cosine_similarity(val_matrix, test_matrix)    # (n_val, n_test)
        ours_sorted = np.sort(sim_val_to_test, axis=1)[:, ::-1]
        ours_topk = ours_sorted[:, :top_k]
        ours_topk_avg = ours_topk.mean(axis=1)

        # 4) F-ratio = ours / baseline
        F_ratio = ours_topk_avg / (baseline_topk_avg + 1e-8)

        baseline_col_name = f'{model_name}_baseline'
        ours_col_name     = f'{model_name}_ours'
        ratio_col_name    = f'{model_name}_F_ratio'

        feas_table[baseline_col_name] = baseline_topk_avg
        feas_table[ours_col_name]     = ours_topk_avg
        feas_table[ratio_col_name]    = F_ratio

    out_csv = f"{save_path}/feasibility_Fratio_top{top_k}_{i}.csv"
    feas_table.to_csv(out_csv, index=False, encoding='utf-8-sig')
    print(f" - save ‚Üí {out_csv}")

In [None]:
feasibility1 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_1.csv")
feasibility2 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_2.csv")
feasibility3 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_3.csv")
feasibility4 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_4.csv")
feasibility5 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_5.csv")

# Trend-fit

In [None]:
import ast
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# -----------------------------------------
# emb columns
# -----------------------------------------
embedding_cols = [
    "gpt_tech_emb", "gpt_both_emb",
    "claude_tech_emb", "claude_both_emb",
    "llama_tech_emb", "llama_both_emb"
]

# -----------------------------------------
# 1~5 repeat
# -----------------------------------------
for i in range(1, 6):
    print(f"\n===== {i} Trend-Fit start =====")

    pkl_path = f"{save_path}/abstract_all_{i}_with_embeddings.pkl"

    try:
        val = pd.read_pickle(pkl_path)
    except FileNotFoundError:
        print(f" - {pkl_path} none ‚Üí skip")
        continue

    existing_cols = [col for col in embedding_cols if col in val.columns]
    print(f" - emb col: {existing_cols}")

    trend_table = pd.DataFrame({"vacancy_num": val["space_id"]})

    # -----------------------------------------
    #  trend-fit score
    # -----------------------------------------
    for col in existing_cols:
        print(f"Processing (trend-fit): {col}")

        if isinstance(val[col].iloc[0], str):
            val[col] = val[col].apply(ast.literal_eval)

        # (val_len, dim)
        val_matrix = np.vstack(val[col].to_numpy())

        # cosine similarity
        sim_to_test  = cosine_similarity(val_matrix, test_matrix)
        sim_to_train = cosine_similarity(val_matrix, train_matrix)

        avg_sim_test  = sim_to_test.mean(axis=1)
        avg_sim_train = sim_to_train.mean(axis=1)

        # Trend-Fit Ratio
        trend = avg_sim_test / (avg_sim_train + 1e-8)

        base = col.replace("_emb", "")
        trend_table[f"{base}_trend"] = trend.round(4)

    # -----------------------------------------
    # save
    # -----------------------------------------
    out_csv = f"{save_path}/trendfit_{i}.csv"
    trend_table.to_csv(out_csv, index=False, encoding='utf-8-sig')
    print(f" - save ‚Üí {out_csv}")


In [None]:
trendfit1 = pd.read_csv(f"{save_path}/trendfit_1.csv")
trendfit2 = pd.read_csv(f"{save_path}/trendfit_2.csv")
trendfit3 = pd.read_csv(f"{save_path}/trendfit_3.csv")
trendfit4 = pd.read_csv(f"{save_path}/trendfit_4.csv")
trendfit5 = pd.read_csv(f"{save_path}/trendfit_5.csv")

In [None]:
import pandas as pd
import numpy as np

def summarize_runs_by_vacancy(dfs, index_col='vacancy_num', float_decimals=3):
    """
    -> DataFrame ('mean¬±std')
    """

    aligned = []
    for df in dfs:
        df2 = df.copy()
        df2 = df2.set_index(index_col).sort_index()
        aligned.append(df2)

    base_index = aligned[0].index
    base_cols = aligned[0].columns
    for k, df in enumerate(aligned[1:], start=2):

    # (n_vacancy, n_cols, n_runs)
    stack = np.stack([df.values for df in aligned], axis=2)
    means = stack.mean(axis=2)
    stds  = stack.std(axis=2, ddof=1)

    #  mean¬±std
    def fmt(m, s):
        return f"{m:.{float_decimals}f}¬±{s:.{float_decimals}f}"

    summary = pd.DataFrame(index=base_index)
    for j, col in enumerate(base_cols):
        col_means = means[:, j]
        col_stds  = stds[:, j]
        summary[col] = [fmt(m, s) for m, s in zip(col_means, col_stds)]

    # vacancy_num -> columns
    summary = summary.reset_index()
    return summary


In [None]:
nov1 = pd.read_csv(f"{save_path}/val_novelty_1.csv")
nov2 = pd.read_csv(f"{save_path}/val_novelty_2.csv")
nov3 = pd.read_csv(f"{save_path}/val_novelty_3.csv")
nov4 = pd.read_csv(f"{save_path}/val_novelty_4.csv")
nov5 = pd.read_csv(f"{save_path}/val_novelty_5.csv")

nov_list = [nov1, nov2, nov3, nov4, nov5]

nov_summary = summarize_runs_by_vacancy(nov_list, index_col='vacancy_num', float_decimals=3)
nov_summary["vacancy_num_int"] = nov_summary["vacancy_num"].str.extract(r"(\d+)").astype(int)
nov_summary = nov_summary.sort_values(by="vacancy_num_int").drop(columns=["vacancy_num_int"])

# Ï†ÄÏû•
display(nov_summary)

In [None]:
feas1 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_1.csv")
feas2 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_2.csv")
feas3 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_3.csv")
feas4 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_4.csv")
feas5 = pd.read_csv(f"{save_path}/feasibility_Fratio_top10_5.csv")

feas_list = [feas1, feas2, feas3, feas4, feas5]

feas_summary = summarize_runs_by_vacancy(feas_list, index_col='vacancy_num', float_decimals=3)
feas_summary["vacancy_num_int"] = feas_summary["vacancy_num"].str.extract(r"(\d+)").astype(int)
feas_summary = feas_summary.sort_values(by="vacancy_num_int").drop(columns=["vacancy_num_int"])

# Ï†ÄÏû•
feas_summary


In [None]:
tre1 = pd.read_csv(f"{save_path}/trendfit_1.csv")
tre2 = pd.read_csv(f"{save_path}/trendfit_2.csv")
tre3 = pd.read_csv(f"{save_path}/trendfit_3.csv")
tre4 = pd.read_csv(f"{save_path}/trendfit_4.csv")
tre5 = pd.read_csv(f"{save_path}/trendfit_5.csv")

tre_list = [tre1, tre2, tre3, tre4, tre5]

tre_summary = summarize_runs_by_vacancy(tre_list, index_col='vacancy_num', float_decimals=3)
tre_summary["vacancy_num_int"] = tre_summary["vacancy_num"].str.extract(r"(\d+)").astype(int)
tre_summary = tre_summary.sort_values(by="vacancy_num_int").drop(columns=["vacancy_num_int"])

# Ï†ÄÏû•
tre_summary

# Statistical test

novelty - GPT, Claude, Llama

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon, friedmanchisquare

# nov1~nov5
nov_list = [nov1, nov2, nov3, nov4, nov5]

# vacancy arrange + long-format
records = []

for run_id, df in enumerate(nov_list, start=1):
    for _, row in df.iterrows():
        vac = row['vacancy_num']

        records.append({
            'run': run_id,
            'vacancy': vac,
            'gpt_tech':   row['gpt_tech_lof'],
            'gpt_both':   row['gpt_both_lof'],
            'claude_tech': row['claude_tech_lof'],
            'claude_both': row['claude_both_lof'],
            'llama_tech':  row['llama_tech_lof'],
            'llama_both':  row['llama_both_lof'],
        })

df_long = pd.DataFrame(records)
df_long = df_long.sort_values(['run', 'vacancy']).reset_index(drop=True)

In [None]:
# 1) GPT
stat_gpt, p_gpt = wilcoxon(
    df_long['gpt_tech'], df_long['gpt_both'], zero_method='wilcox'
)

# 2) Claude
stat_claude, p_claude = wilcoxon(
    df_long['claude_tech'], df_long['claude_both'], zero_method='wilcox'
)

# 3) Llama
stat_llama, p_llama = wilcoxon(
    df_long['llama_tech'], df_long['llama_both'], zero_method='wilcox'
)

print("Wilcoxon (tech vs both)")
print(f"GPT    p={p_gpt:.4f}")
print(f"Claude p={p_claude:.4f}")
print(f"Llama  p={p_llama:.4f}")

#Wilcoxon (tech vs both)
#GPT    p=0.4911
#Claude p=0.0130
#Llama  p=0.0327

In [None]:
# Friedman test: Î™®Îç∏ 3Í∞ú(gpt, claude, llama), both Î∞©Ïãù
stat_fr, p_fr = friedmanchisquare(
    df_long['gpt_both'],
    df_long['claude_both'],
    df_long['llama_both']
)

print("Friedman test (three models, both only)")
print(f"stat={stat_fr:.4f}, p={p_fr:.4f}")

#Friedman test (three models, both only)
#stat=9.3143, p=0.0095

feasibility - GPT, Claude, Llama

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import friedmanchisquare, wilcoxon

# 1) 5Í∞ú run align + F_ratio Ïª¨ÎüºÎßå Ï∂îÏ∂ú
cols_target = ["gpt_F_ratio", "claude_F_ratio", "llama_F_ratio"]

aligned = []
for df in feas_list:
    df2 = df.copy()
    df2 = df2.set_index("vacancy_num").sort_index()
    aligned.append(df2[cols_target])

# 2) index / columns ÎèôÏùºÌïúÏßÄ Ï≤¥ÌÅ¨ (ÏïàÏ†ÑÏö©)
base_index = aligned[0].index
base_cols = aligned[0].columns
for k, df in enumerate(aligned[1:], start=2):
    if not df.index.equals(base_index):
        raise ValueError(f"{k}Î≤àÏß∏ DataFrameÏùò vacancy ÏàúÏÑú/Íµ¨ÏÑ±Ïù¥ Îã§Î¶ÖÎãàÎã§.")
    if not df.columns.equals(base_cols):
        raise ValueError(f"{k}Î≤àÏß∏ DataFrameÏùò columns Íµ¨ÏÑ±Ïù¥ Îã§Î¶ÖÎãàÎã§.")

# 3) (n_vacancy, n_cols, n_runs) ÌòïÌÉúÎ°ú stack
stack = np.stack([df.values for df in aligned], axis=2)   # axis=2: run Ï∞®Ïõê
means = stack.mean(axis=2)  # run 5ÌöåÏùò ÌèâÍ∑†

# 4) ÌÜµÍ≥ÑÏö© DF (vacancyÎ≥Ñ, Î™®Îç∏Î≥Ñ ÌèâÍ∑† F_ratio)
feas_means = pd.DataFrame(means, index=base_index, columns=cols_target).reset_index()

# Friedman test: Í∞ôÏùÄ vacancyÏóêÏÑú gpt / claude / llama ÎπÑÍµê
stat, p = friedmanchisquare(
    feas_means["gpt_F_ratio"],
    feas_means["claude_F_ratio"],
    feas_means["llama_F_ratio"],
)

print("Friedman chi-square =", stat, "p-value =", p)


#Friedman chi-square = 8.857142857142847 p-value = 0.011931522535756207

trend-fit - GPT, Claude, Llama

In [None]:
tre_list = [tre1, tre2, tre3, tre4, tre5]

trend_means = compute_means_from_runs(tre_list, index_col="vacancy_num")

# vacancy_num Ï†ïÎ†¨ (ÏõêÌïòÎ©¥)
trend_means["vacancy_num_int"] = trend_means["vacancy_num"].str.extract(r"(\d+)").astype(int)
trend_means = trend_means.sort_values("vacancy_num_int").drop(columns=["vacancy_num_int"])


In [None]:
from scipy.stats import wilcoxon

for model in ["gpt", "claude", "llama"]:
    baseline_col = f"{model}_tech_trend"
    ours_col     = f"{model}_both_trend"

    stat, p = wilcoxon(
        trend_means[baseline_col],
        trend_means[ours_col],
        alternative="two-sided"
    )
    print(f"{model}: Wilcoxon stat={stat}, p-value={p}")

#gpt: Wilcoxon stat=11.0, p-value=0.6875
#claude: Wilcoxon stat=7.0, p-value=0.296875
#llama: Wilcoxon stat=2.0, p-value=0.046875

In [None]:
from scipy.stats import friedmanchisquare

cols_ours = ["gpt_both_trend", "claude_both_trend", "llama_both_trend"]

# vacancyÎ≥Ñ Í∞íÎì§ÏùÑ ÌñâÏúºÎ°ú ÎÜìÍ≥†, Í∞Å Î™®Îç∏ÏùÑ Ïó¥Î°ú Î≥¥Îäî Íµ¨Ï°∞
data_for_friedman = [trend_means[col].values for col in cols_ours]

friedman_result = friedmanchisquare(*data_for_friedman)
print(f"Friedman chi-square = {friedman_result.statistic}, p-value = {friedman_result.pvalue}")

#Friedman chi-square = 12.285714285714278, p-value = 0.002148775480909733