In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import numpy as np

In [3]:
dir = 'data/keywords'
models = ['ChatGPT-3.5', 'ChatGPT-4', 'Claude 3 Sonnet']
countries = ['US', 'TW', 'CN', 'JP', 'KR']
categories = ['economic', 'policy', 'uncertainty']
roles = ['newspaper editor', 'economist', 'Minister of Economic Affairs', 'Central Bank Governor']

### keywords defined by economists

In [4]:
paper_keywords = {
    'US': {'economic': [], 'policy': [], 'uncertainty': []},
    'TW': {'economic': [], 'policy': [], 'uncertainty': []},
    'CN': {'economic': [], 'policy': [], 'uncertainty': []},
    'JP': {'economic': [], 'policy': [], 'uncertainty': []},
    'KR': {'economic': [], 'policy': [], 'uncertainty': []},
}

for country in paper_keywords.keys():
    for cate in categories:
        paper_keywords[country][cate] = (
            pd.read_excel(f'{dir}/paper_keywords.xlsx', sheet_name=cate)[country]
            .dropna()
            .to_list()
        )

In [5]:
def F1(prec, recall):
    if prec == 0 and recall == 0:
        return 0
    return (2*prec*recall)/(prec+recall)

In [37]:
data = []
for model in models:
    for task in [' nd', '']:
        for country in countries:
            for cate in categories:
                t = 'Definition' if task == '' else 'Simple'
                sample = [model, t, country, cate]
                prec_role = []
                recall_role = []
                f1_role = []
                for role in roles:
                    sheet_name = role + task
                    df = pd.read_excel(f'{dir}/{country}/{model}/{cate}.xlsx', sheet_name=sheet_name, header=None)
                    prec_role.append(np.mean(
                        [
                            df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(df[i].dropna())
                            for i in range(10)
                        ]
                    ))
                    recall_role.append(np.mean(
                        [
                            df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(paper_keywords[country][cate])
                            for i in range(10)
                        ]
                    ))
                    f1_role.append(np.mean(
                        [
                            F1(
                                df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(df[i].dropna()),
                                df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(paper_keywords[country][cate])
                            )
                            for i in range(10)
                        ]
                    ))
                data.append(
                    [model, t, country, cate, np.mean(prec_role), np.mean(recall_role), np.mean(f1_role)]
                )
df = pd.DataFrame(data, columns=['Model', 'Task Description', 'Country', 'Category', 'Precision', 'Recall', 'F1'])
df.groupby(['Model', 'Task Description'])[['Precision', 'Recall', 'F1']].mean() *100

Unnamed: 0_level_0,Unnamed: 1_level_0,Precision,Recall,F1
Model,Task Description,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ChatGPT-3.5,Definition,11.812037,38.665961,13.719987
ChatGPT-3.5,Simple,10.426316,30.746797,11.774077
ChatGPT-4,Definition,10.375877,29.545551,11.435388
ChatGPT-4,Simple,9.007037,28.811178,10.306633
Claude 3 Sonnet,Definition,12.138698,41.421876,14.253992
Claude 3 Sonnet,Simple,9.706746,30.008549,11.086483
