In [1]:
import os
os.chdir('../')

In [2]:
import pandas as pd
import numpy as np

In [3]:
dir = 'data/keywords'
models = ['ChatGPT-3.5', 'ChatGPT-4', 'Claude 3 Sonnet']
countries = ['US', 'TW', 'CN', 'JP', 'KR']
categories = ['economic', 'policy', 'uncertainty']
roles = ['newspaper editor', 'economist', 'Minister of Economic Affairs', 'Central Bank Governor']

### keywords defined by economists

In [4]:
paper_keywords = {
    'US': {'economic': [], 'policy': [], 'uncertainty': []},
    'TW': {'economic': [], 'policy': [], 'uncertainty': []},
    'CN': {'economic': [], 'policy': [], 'uncertainty': []},
    'JP': {'economic': [], 'policy': [], 'uncertainty': []},
    'KR': {'economic': [], 'policy': [], 'uncertainty': []},
}

for country in paper_keywords.keys():
    for cate in categories:
        paper_keywords[country][cate] = (
            pd.read_excel(f'{dir}/paper_keywords.xlsx', sheet_name=cate)[country]
            .dropna()
            .to_list()
        )

In [5]:
def F1(prec, recall):
    if prec == 0 and recall == 0:
        return 0
    return (2*prec*recall)/(prec+recall)

In [19]:
data = []
for model in models:
    for task in ['']:  # Task Descriptioon = Definition 
        for country in countries:
            for cate in categories:
                t = 'Definition' if task == '' else 'Simple'
                f1_role = []
                for role in roles:
                    sheet_name = role + task
                    df = pd.read_excel(f'{dir}/{country}/{model}/{cate}.xlsx', sheet_name=sheet_name, header=None)

                    f1_role.append(np.mean(
                        [
                            F1(
                                df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(df[i].dropna()),
                                df[i].dropna().isin(paper_keywords[country][cate]).sum() / len(paper_keywords[country][cate])
                            )
                            for i in range(10)
                        ]
                    ))
                data.append(
                    ([model, t, country, cate ] + f1_role)
                )
df = pd.DataFrame(data, columns=['Model', 'Task Description', 'Country', 'Category', 'Editor', 'Economist', 'Minister', 'Governor'])
df = pd.concat([
    df.groupby(['Model'])[['Editor', 'Economist', 'Minister', 'Governor']].mean() *100,
    df[df['Country'] == 'TW'].groupby(['Model'])[['Editor', 'Economist', 'Minister', 'Governor']].mean() *100
]).reset_index()


df['Country'] = ['All']*3+['Taiwan']*3
df.set_index(['Country', 'Model'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Editor,Economist,Minister,Governor
Country,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All,ChatGPT-3.5,13.629802,13.383827,13.880169,13.986149
All,ChatGPT-4,11.228571,11.988137,11.527728,10.997117
All,Claude 3 Sonnet,13.710435,13.967077,14.430514,14.907944
Taiwan,ChatGPT-3.5,14.720539,15.18759,15.542088,14.740741
Taiwan,ChatGPT-4,14.161616,14.552189,14.127946,12.026936
Taiwan,Claude 3 Sonnet,11.245791,12.390572,11.441077,13.959596
