## AIのデータ生成

In [1]:
!@cd

c:\Users\Owner\source\repos\ds_bias\neosim2\preprocessing


In [2]:
import numpy as np
import polars as pl

In [3]:
import json

In [4]:
import sys
sys.path.append(f"../../neosim/exp")

In [5]:
from ai_simulator import generate_confusion_matrix, get_dataset_human_acc, generate_ai_dataset

In [6]:
np.random.seed(777)

## datasets

In [None]:
datasets = ["dog", "face", "tiny"]
ai_accs = ["-sigma", "mean", "+sigma", "max"]
num_AI = 30

In [7]:
datasets = ["adult",]
ai_accs = ["-sigma", "mean", "+sigma", "max"]
num_AI = 30

In [8]:
for dataset_name in datasets:
    with open(f"../human_responses/{dataset_name}_dataset_profile.json", "r") as f:
        dataset_profile = json.load(f)

    r_range = dataset_profile["r_range"]
    n_classes = dataset_profile["n_classes"]
    labels = dataset_profile["labels"]

    for r in r_range:
        human_df = pl.read_csv(f"../human_responses/{dataset_name}_r={r}.csv").to_pandas()
        gt = pl.read_csv(f"../human_responses/{dataset_name}_gt.csv").to_pandas().set_index("task")
        human_acc, human_std = get_dataset_human_acc(human_df, gt)
        print(f"Dataset: {dataset_name}, r: {r}, human_acc: {human_acc:.3f}, human_std: {human_std:.3f}")

        for ai_acc in ai_accs:
            if ai_acc == "-sigma":
                ai_acc_value = human_acc - human_std
            elif ai_acc == "mean":
                ai_acc_value = human_acc
            elif ai_acc == "+sigma":
                ai_acc_value = human_acc + human_std
            elif ai_acc == "max":
                ai_acc_value = (n_classes - 1) / n_classes
            if ai_acc_value > (n_classes - 1) / n_classes:
                print(f"Skipping {dataset_name} r={r} {ai_acc} as it exceeds max accuracy.")
                continue

            h = (n_classes * ai_acc_value * (n_classes-1) - 1) / (n_classes *  (n_classes -2))
        
            print(f"Dataset: {dataset_name}, r: {r}, ai_acc: {ai_acc}, h: {h:.3f}")
            
            for fixed_target in range(1, n_classes):
                cm = generate_confusion_matrix(n_classes, r=h, fixed_index=[[0,fixed_target]])
                for i in range(num_AI):
                    ai_df = generate_ai_dataset(cm, gt, f"AI_{ai_acc}_{i}", labels)
                    ai_df.to_csv(f"../ai_responses/{dataset_name}_r={r}_ai={ai_acc}_target={fixed_target}_run={i}.csv", index=False)


Dataset: adult, r: 3, human_acc: 0.782, human_std: 0.265
Dataset: adult, r: 3, ai_acc: -sigma, h: 0.622
Dataset: adult, r: 3, ai_acc: mean, h: 0.976
Skipping adult r=3 +sigma as it exceeds max accuracy.
Dataset: adult, r: 3, ai_acc: max, h: 1.000
Dataset: adult, r: 5, human_acc: 0.775, human_std: 0.217
Dataset: adult, r: 5, ai_acc: -sigma, h: 0.677
Dataset: adult, r: 5, ai_acc: mean, h: 0.967
Skipping adult r=5 +sigma as it exceeds max accuracy.
Dataset: adult, r: 5, ai_acc: max, h: 1.000


```
Dataset: dog, r: 3, human_acc: 0.712, human_std: 0.202
Dataset: dog, r: 3, ai_acc: -sigma, h: 0.641
Dataset: dog, r: 3, ai_acc: mean, h: 0.944
Skipping dog r=3 +sigma as it exceeds max accuracy.
Dataset: dog, r: 3, ai_acc: max, h: 1.000
Dataset: dog, r: 5, human_acc: 0.704, human_std: 0.182
Dataset: dog, r: 5, ai_acc: -sigma, h: 0.657
Dataset: dog, r: 5, ai_acc: mean, h: 0.930
Skipping dog r=5 +sigma as it exceeds max accuracy.
Dataset: dog, r: 5, ai_acc: max, h: 1.000
Dataset: dog, r: 10, human_acc: 0.700, human_std: 0.199
Dataset: dog, r: 10, ai_acc: -sigma, h: 0.627
Dataset: dog, r: 10, ai_acc: mean, h: 0.926
Skipping dog r=10 +sigma as it exceeds max accuracy.
Dataset: dog, r: 10, ai_acc: max, h: 1.000
Dataset: face, r: 3, human_acc: 0.565, human_std: 0.101
Dataset: face, r: 3, ai_acc: -sigma, h: 0.570
Dataset: face, r: 3, ai_acc: mean, h: 0.722
Dataset: face, r: 3, ai_acc: +sigma, h: 0.874
Dataset: face, r: 3, ai_acc: max, h: 1.000
Dataset: face, r: 5, human_acc: 0.592, human_std: 0.085
Dataset: face, r: 5, ai_acc: -sigma, h: 0.636
Dataset: face, r: 5, ai_acc: mean, h: 0.763
Dataset: face, r: 5, ai_acc: +sigma, h: 0.891
Dataset: face, r: 5, ai_acc: max, h: 1.000
Dataset: tiny, r: 2, human_acc: 0.710, human_std: 0.223
Dataset: tiny, r: 2, ai_acc: -sigma, h: 0.584
Dataset: tiny, r: 2, ai_acc: mean, h: 0.880
Skipping tiny r=2 +sigma as it exceeds max accuracy.
Dataset: tiny, r: 2, ai_acc: max, h: 1.000
```

```
Dataset: adult, r: 3, human_acc: 0.782, human_std: 0.265
Dataset: adult, r: 3, ai_acc: -sigma, h: 0.622
Dataset: adult, r: 3, ai_acc: mean, h: 0.976
Skipping adult r=3 +sigma as it exceeds max accuracy.
Dataset: adult, r: 3, ai_acc: max, h: 1.000
Dataset: adult, r: 5, human_acc: 0.775, human_std: 0.217
Dataset: adult, r: 5, ai_acc: -sigma, h: 0.677
Dataset: adult, r: 5, ai_acc: mean, h: 0.967
Skipping adult r=5 +sigma as it exceeds max accuracy.
Dataset: adult, r: 5, ai_acc: max, h: 1.000
```