In [27]:
import pandas as pd
import numpy as np

In [28]:
data = pd.read_csv("./data/000_MDT_TEST_2020_Dec_07_1447_V1_TEST.csv")

In [29]:
def get_responses(data):
    return data[['img', 'earlyResp', 'resp', 'lateResp']]

In [30]:
def fill_row(row):
    if np.isnan(row['resp']):
        if not np.isnan(row['lateResp']):
            return row['lateResp']
        elif not np.isnan(row['earlyResp']):
            return row['earlyResp']
        else:
            return -1
    return row['resp']

In [31]:
def clear_blanks(tbl):
    tbl['resp'] = tbl.apply(fill_row, axis=1)
    return tbl[tbl['resp'] != -1][['img', 'resp']]

In [32]:
valid_resp = clear_blanks(get_responses(data))
valid_resp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tbl['resp'] = tbl.apply(fill_row, axis=1)


Unnamed: 0,img,resp
0,079a.jpg,1
1,082a.jpg,1
2,083a.jpg,1
3,090a.jpg,1
4,098a.jpg,1
5,099a.jpg,1
6,102a.jpg,3
7,103a.jpg,1
8,105a.jpg,1
9,113a.jpg,1


In [33]:
def score_correctness(row):
    if 'a' in row['img']:
        if row['resp'] == 4 or row['resp'] == 3:
            return 1
        elif row['resp'] == 1 or row['resp'] == 2:
            return 0
    elif 'b' in row['img'] or 'foil' in row['img']:
        if row['resp'] == 4 or row['resp'] == 3:
            return 0
        elif row['resp'] == 1 or row['resp'] == 2:
            return 1

In [34]:
def score_confidence(row):
    if row['resp'] == 1 or row['resp'] == 4:
        return 1
    else:
        return 0

In [35]:
valid_resp['correctness'] = valid_resp.apply(score_correctness, axis=1)

In [36]:
valid_resp

Unnamed: 0,img,resp,correctness
0,079a.jpg,1,0
1,082a.jpg,1,0
2,083a.jpg,1,0
3,090a.jpg,1,0
4,098a.jpg,1,0
5,099a.jpg,1,0
6,102a.jpg,3,1
7,103a.jpg,1,0
8,105a.jpg,1,0
9,113a.jpg,1,0


In [37]:
valid_resp['confidence'] = valid_resp.apply(score_confidence, axis = 1)
valid_resp

Unnamed: 0,img,resp,correctness,confidence
0,079a.jpg,1,0,1
1,082a.jpg,1,0,1
2,083a.jpg,1,0,1
3,090a.jpg,1,0,1
4,098a.jpg,1,0,1
5,099a.jpg,1,0,1
6,102a.jpg,3,1,0
7,103a.jpg,1,0,1
8,105a.jpg,1,0,1
9,113a.jpg,1,0,1


In [38]:
lures = valid_resp[valid_resp['img'].str.contains('b')]
targets = valid_resp[valid_resp['img'].str.contains('a')]
novels = valid_resp[valid_resp['img'].str.contains('foil')]

In [39]:
lures

Unnamed: 0,img,resp,correctness,confidence
23,059b.jpg,3,0,0
24,068b.jpg,4,0,1
25,070b.jpg,4,0,1
26,087b.jpg,4,0,1
27,091b.jpg,4,0,1
28,092b.jpg,4,0,1
29,095b.jpg,4,0,1
30,097b.jpg,3,0,0
31,100b.jpg,4,0,1
32,104b.jpg,1,1,1


In [40]:
targets

Unnamed: 0,img,resp,correctness,confidence
0,079a.jpg,1,0,1
1,082a.jpg,1,0,1
2,083a.jpg,1,0,1
3,090a.jpg,1,0,1
4,098a.jpg,1,0,1
5,099a.jpg,1,0,1
6,102a.jpg,3,1,0
7,103a.jpg,1,0,1
8,105a.jpg,1,0,1
9,113a.jpg,1,0,1


In [41]:
novels

Unnamed: 0,img,resp,correctness,confidence
44,foil_1201.jpg,4,0,1
45,foil_1202.jpg,4,0,1
46,foil_1203.jpg,4,0,1
47,foil_1204.jpg,4,0,1


In [42]:
output = {'total': np.zeros(6), 
         'percent': np.zeros(6),
         'high_conf': np.zeros(6),
         'low_conf': np.zeros(6),
          'high_conf_pct': np.zeros(6),
          'low_conf_pct': np.zeros(6)}
output_tbl = pd.DataFrame(output, index=["lure_hit", "lure_miss",
                                        "target_hit", "target_miss",
                                        "novel_hit", "novel_miss"])
output_tbl

Unnamed: 0,total,percent,high_conf,low_conf,high_conf_pct,low_conf_pct
lure_hit,0.0,0.0,0.0,0.0,0.0,0.0
lure_miss,0.0,0.0,0.0,0.0,0.0,0.0
target_hit,0.0,0.0,0.0,0.0,0.0,0.0
target_miss,0.0,0.0,0.0,0.0,0.0,0.0
novel_hit,0.0,0.0,0.0,0.0,0.0,0.0
novel_miss,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
def compute_rows(tbl):
    """Returns a 2-item list RES of hit/miss data for a lure, target, or novel table.
    
    RES[0] is the hits. RES[1] is the misses.
    """
    hit_res = [0] * 6
    miss_res = [0] * 6
    hits = tbl[tbl['correctness'] == 1]
    misses = tbl[tbl['correctness'] == 0]
    # counts
    num_hits = len(hits.index)
    num_misses = len(misses.index)
    print(num_hits, num_misses)
    # percentages
    pct_hits = num_hits/(num_hits + num_misses)
    pct_misses = num_misses/(num_hits + num_misses)
    # confidence counts
    num_hc_hit = sum(hits['confidence'])
    num_lc_hit = num_hits - num_hc_hit
    num_hc_miss = sum(misses['confidence'])
    num_lc_miss = num_misses - num_hc_miss
    # confidence percents
    pct_hc_hit = num_hc_hit/num_hits if num_hits else None
    pct_lc_hit = num_lc_hit/num_hits if num_hits else None
    pct_hc_miss = num_hc_miss/num_misses
    pct_lc_miss = num_lc_miss/num_misses
    return [[num_hits, pct_hits, num_hc_hit, num_lc_hit, pct_hc_hit, pct_lc_hit], 
           [num_misses, pct_misses, num_hc_miss, num_lc_miss, pct_hc_miss, pct_lc_miss]]

In [50]:
lure_res = compute_rows(lures)
target_res = compute_rows(targets)
novel_res = compute_rows(novels)

3 18
1 22
0 4


In [51]:
output_tbl.loc['lure_hit'] = lure_res[0]
output_tbl.loc['lure_miss'] = lure_res[1]
output_tbl.loc['target_hit'] = target_res[0]
output_tbl.loc['target_miss'] = target_res[1]
output_tbl.loc['novel_hit'] = novel_res[0]
output_tbl.loc['novel_miss'] = novel_res[1]

In [52]:
output_tbl

Unnamed: 0,total,percent,high_conf,low_conf,high_conf_pct,low_conf_pct
lure_hit,3.0,0.142857,3.0,0.0,1.0,0.0
lure_miss,18.0,0.857143,14.0,4.0,0.777778,0.222222
target_hit,1.0,0.043478,0.0,1.0,0.0,1.0
target_miss,22.0,0.956522,22.0,0.0,1.0,0.0
novel_hit,0.0,0.0,0.0,0.0,,
novel_miss,4.0,1.0,4.0,0.0,1.0,0.0


In [53]:
output_tbl.to_csv('RESULTS')