# Analyze Results
Processes the JSON file returned from the model and also runs evaluation code for different image captionign metrics

## Process JSON File

In [55]:
import json
import os

In [133]:
FILE = 'blockMaleConf'

In [134]:
filepath = '../results/{0}.json'.format(FILE)

In [135]:
input_file = open(filepath)
json_array = json.load(input_file)

In [136]:
os.path.basename(json_array[0]['filename'])

'f_f_sports_515982_2.jpg'

In [137]:
parsed_results = []
for img in json_array:
    caption = img['caption']
    file = os.path.basename(img['filename']).split('_')
    file_num = file[3].split('.')[0]
    gender = file[1]
    category = file[2]
    source = os.path.basename(img['filename'])
    if source is 'f':
        source = 'Flickr'
    else:
        source = 'COCO'
    parsed_results.append([caption, file_num, gender, category, source, os.path.basename(img['filename'])])
    print('Caption: {0} Gender: {1} Category: {2} Source: {3}'.format(caption, gender, category, source))

Caption: a baseball player sliding into home base. Gender: f Category: sports Source: COCO
Caption: a black and white photo of a man in a suit and tie. Gender: f Category: tie Source: COCO
Caption: a group of young men playing a game of frisbee. Gender: f Category: frisbee Source: COCO
Caption: a person on a skateboard in a park. Gender: m Category: skateboard Source: COCO
Caption: a person jumping a skate board in the air. Gender: m Category: skateboard Source: COCO
Caption: a woman is eating a doughnut with sprinkles. Gender: f Category: toothbrush Source: COCO
Caption: a young boy holding a hot dog in his hands. Gender: m Category: hairdrier Source: COCO
Caption: a woman holding a tennis racquet on a tennis court. Gender: f Category: racket Source: COCO
Caption: a woman is standing on a skateboard in the street. Gender: f Category: skateboard Source: COCO
Caption: a close up of a person brushing his teeth. Gender: m Category: toothbrush Source: COCO
Caption: a woman brushing her tee

In [138]:
parsed_results

[['a baseball player sliding into home base.',
  '515982',
  'f',
  'sports',
  'COCO',
  'f_f_sports_515982_2.jpg'],
 ['a black and white photo of a man in a suit and tie.',
  '21604',
  'f',
  'tie',
  'COCO',
  'f_f_tie_21604_1.jpg'],
 ['a group of young men playing a game of frisbee.',
  '127263',
  'f',
  'frisbee',
  'COCO',
  'f_f_frisbee_127263_4.jpg'],
 ['a person on a skateboard in a park.',
  '13201',
  'm',
  'skateboard',
  'COCO',
  'm_m_skateboard_13201.jpg'],
 ['a person jumping a skate board in the air.',
  '128699',
  'm',
  'skateboard',
  'COCO',
  'm_m_skateboard_128699.jpg'],
 ['a woman is eating a doughnut with sprinkles.',
  '445999',
  'f',
  'toothbrush',
  'COCO',
  'm_f_toothbrush_445999.jpg'],
 ['a young boy holding a hot dog in his hands.',
  '350002',
  'm',
  'hairdrier',
  'COCO',
  'f_m_hairdrier_350002_5.jpg'],
 ['a woman holding a tennis racquet on a tennis court.',
  '64718',
  'f',
  'racket',
  'COCO',
  'f_f_racket_64718_1.jpg'],
 ['a woman is st

In [139]:
# saves captions to csv
import csv
with open('../results/{0}.csv'.format(FILE), 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['Caption', 'Filename', 'Gender', 'Category', 'Source', 'File'])
    for i in parsed_results:
        writer.writerow(i)

## Process Captions

In [10]:
import pandas as pd
import json

In [107]:
# process result annotations
filepath = '../results/{0}.json'.format(FILE)
input_file = open(filepath)
json_array = json.load(input_file)
json_array

[{'caption': 'a baseball player sliding into a base.',
  'filename': './data/mscoco/simimages/f_f_sports_515982_2.jpg',
  'image_id': 2},
 {'caption': 'a black and white photo of a man in a suit.',
  'filename': './data/mscoco/simimages/f_f_tie_21604_1.jpg',
  'image_id': 1},
 {'caption': 'a group of young men playing a game of frisbee.',
  'filename': './data/mscoco/simimages/f_f_frisbee_127263_4.jpg',
  'image_id': 4},
 {'caption': 'a person on a skateboard in a park.',
  'filename': './data/mscoco/simimages/m_m_skateboard_13201.jpg',
  'image_id': 13201},
 {'caption': 'a person on a skate board in the air.',
  'filename': './data/mscoco/simimages/m_m_skateboard_128699.jpg',
  'image_id': 128699},
 {'caption': 'a close up of a person holding a banana.',
  'filename': './data/mscoco/simimages/m_f_toothbrush_445999.jpg',
  'image_id': 445999},
 {'caption': 'a young boy holding a hot dog in his hands.',
  'filename': './data/mscoco/simimages/f_m_hairdrier_350002_5.jpg',
  'image_id': 5}

## Women Also Snowboard Metrics

In [140]:
GT_Gender = len(df[df['Gender'] == 'f']) / len(df[df['Gender'] == 'm'])

In [141]:
def snowboard_metrics(df, filename):
    gender_ratio = len(df[df['Gender_Predict'] == 'f']) / len(df[df['Gender_Predict'] == 'm'])
    error_rate = len(df[df['Gender_Correct'] == 0]) / (len(df['Gender_Correct']))
    print('Model Name: {2} | Ratio Delta: {0} | Error Rate: {1}'.format(round(GT_Gender - gender_ratio, 3), round(error_rate,3), filename))

In [147]:
results = []
for file in os.listdir("../results"):
    if file.endswith(".csv"):
        results.append(os.path.join("../results", file))

In [151]:
for result in results:
    df = pd.read_csv(result)
    correct = []
    gt_gender = list(df['Gender'])
    pred_gender = list(df['Gender_Predict'])
    for index, i in enumerate(pred_gender):
        if i is 'n':
            correct.append(2)
        elif i is gt_gender[index]:
            correct.append(1)
        else:
            correct.append(0)
    df['Gender_Correct'] = correct
    df.to_csv(result, index=False)
    snowboard_metrics(df, result)

Model Name: ../results/blockMale.csv | Ratio Delta: 0.481 | Error Rate: 0.223
Model Name: ../results/blockMaleConf.csv | Ratio Delta: 0.033 | Error Rate: 0.213
Model Name: ../results/blockNeutral.csv | Ratio Delta: 0.429 | Error Rate: 0.245
Model Name: ../results/blockNeutralConf.csv | Ratio Delta: -0.151 | Error Rate: 0.213
Model Name: ../results/blockNeutralConf_2.csv | Ratio Delta: 0.033 | Error Rate: 0.213


## Format Captions

In [155]:
import pandas as pd
import json
import os

In [159]:
FILE = 'blockMale'

In [160]:
filepath = '../annotations/similarMatch.json'
input_file = open(filepath)
json_array = json.load(input_file)
json_array

{'f_f_frisbee_328238_2.jpg': ['m_m_frisbee_227482.jpg'],
 'f_f_frisbee_88485_2.jpg': ['m_m_frisbee_88485.jpg'],
 'f_f_tie_131444_1.jpg': ['m_m_tie_152214.jpg'],
 'f_f_surfboard_115898_1.jpg': ['m_m_surfboard_115898.jpg'],
 'f_f_frisbee_227482_3.jpg': ['m_m_frisbee_328238.jpg'],
 'f_f_sports_371552_4.jpg': ['m_m_sports_135604.jpg'],
 'f_f_racket_88970_2.jpg': ['m_m_racket_88970.jpg'],
 'f_f_tie_21604_1.jpg': ['m_m_tie_21604.jpg'],
 'f_f_surfboard_7278_3.jpg': ['m_m_surfboard_32570.jpg'],
 'f_f_skateboard_13201_4.jpg': ['m_m_skateboard_13201.jpg',
  'm_m_skateboard_125472.jpg',
  'm_m_skateboard_72281.jpg'],
 'f_f_sports_429690_1.jpg': ['m_m_sports_429690.jpg',
  'm_m_sports_89296.jpg',
  'm_m_sports_192670.jpg'],
 'f_f_frisbee_127263_4.jpg': ['m_m_frisbee_127263.jpg'],
 'f_f_frisbee_291619_2.jpg': ['m_m_frisbee_291619.jpg'],
 'f_f_racket_88970_5.jpg': ['m_m_racket_85772.jpg'],
 'f_f_racket_64718_1.jpg': ['m_m_racket_127530.jpg'],
 'f_f_surfboard_32570_2.jpg': ['m_m_surfboard_190007.jpg'

In [161]:
df = pd.read_csv('../results/{0}.csv'.format(FILE))
files = df['File'].tolist()
captions = df['Caption'].tolist()

In [162]:
def formatResults(filename, save):
    df = pd.read_csv(filename)
    files = df['File'].tolist()
    captions = df['Caption'].tolist()
    result = []
    for index, file in enumerate(files):
        split = file.split('_')
        if split[0] is 'm':
            result.append({'image_id': split[3].split('.')[0], "caption": captions[index]})
        else:
            matches = json_array[file]
            for match in matches:
                match_split = match.split('_')
                result.append(({'image_id': match_split[3].split('.')[0] + "6", "caption": captions[index]}))
    with open('../results/res{0}.json'.format(save), 'w') as f:
        json.dump(result, f)

In [163]:
results = []
for file in os.listdir("../results"):
    if file.endswith(".csv"):
        print(file)
        save = file.split('.')[0]
        formatResults(os.path.join("../results", file), save)

blockMale.csv
blockMaleConf.csv
blockNeutral.csv
blockNeutralConf.csv
blockNeutralConf_2.csv


In [34]:
df = pd.read_csv("../annotations/gtValidation.csv")
df.head()
image_id_f = [int(str(m) + '6') for m in df['image_id_m'].tolist()]
df['image_id_f'] = image_id_f
df.to_csv("../annotations/gtValidation.csv", index=False)

In [26]:
gt = {}
male_ids = df['image_id_m'].tolist()
coco_caption = df['coco_caption'].tolist()
for index, m in enumerate(male_ids):
    if m not in gt:
        gt[m] = [{'image_id': m, 'caption': coco_caption[index]}]
    else:
        gt[m].append({'image_id': m, 'caption': coco_caption[index]})

In [27]:
female_ids = df['image_id_f'].tolist()
f_caption = df['sim_caption'].tolist()
for index, m in enumerate(female_ids):
    if m not in gt:
        gt[m] = [{'image_id': m, 'caption': f_caption[index]}]
    else:
        gt[m].append({'image_id': m, 'caption': f_caption[index]})

In [28]:
with open('../results/gtAnns.json', 'w') as f:
    json.dump(gt, f)