# Analyze Results
Processes the JSON file returned from the model and also runs evaluation code for different image captionign metrics

## Process JSON File

In [55]:
import json
import os

In [96]:
FILE = 'equalizer'

In [97]:
filepath = '../results/{0}.json'.format(FILE)

In [98]:
input_file = open(filepath)
json_array = json.load(input_file)

In [100]:
os.path.basename(json_array[0]['filename'])

'f_f_sports_515982_2.jpg'

In [102]:
parsed_results = []
for img in json_array:
    caption = img['caption']
    file = os.path.basename(img['filename']).split('_')
    file_num = file[3].split('.')[0]
    gender = file[1]
    category = file[2]
    source = os.path.basename(img['filename'])
    if source is 'f':
        source = 'Flickr'
    else:
        source = 'COCO'
    parsed_results.append([caption, file_num, gender, category, source, os.path.basename(img['filename'])])
    print('Caption: {0} Gender: {1} Category: {2} Source: {3}'.format(caption, gender, category, source))

Caption: a baseball player swinging a bat at a ball. Gender: f Category: sports Source: COCO
Caption: a black and white photo of a person wearing a suit and tie. Gender: f Category: tie Source: COCO
Caption: a group of young people playing a game of frisbee. Gender: f Category: frisbee Source: COCO
Caption: a man riding a skateboard up the side of a ramp. Gender: m Category: skateboard Source: COCO
Caption: a person jumping a skate board in the air. Gender: m Category: skateboard Source: COCO
Caption: a woman holding a baby in her arms. Gender: f Category: toothbrush Source: COCO
Caption: a man holding a hot dog in his hands. Gender: m Category: hairdrier Source: COCO
Caption: a female tennis player in action on the court. Gender: f Category: racket Source: COCO
Caption: a woman walking down a street while talking on a cell phone. Gender: f Category: skateboard Source: COCO
Caption: a man brushing his teeth with a tooth brush. Gender: m Category: toothbrush Source: COCO
Caption: a woma

In [103]:
parsed_results

[['a baseball player swinging a bat at a ball.',
  '515982',
  'f',
  'sports',
  'COCO',
  'f_f_sports_515982_2.jpg'],
 ['a black and white photo of a person wearing a suit and tie.',
  '21604',
  'f',
  'tie',
  'COCO',
  'f_f_tie_21604_1.jpg'],
 ['a group of young people playing a game of frisbee.',
  '127263',
  'f',
  'frisbee',
  'COCO',
  'f_f_frisbee_127263_4.jpg'],
 ['a man riding a skateboard up the side of a ramp.',
  '13201',
  'm',
  'skateboard',
  'COCO',
  'm_m_skateboard_13201.jpg'],
 ['a person jumping a skate board in the air.',
  '128699',
  'm',
  'skateboard',
  'COCO',
  'm_m_skateboard_128699.jpg'],
 ['a woman holding a baby in her arms.',
  '445999',
  'f',
  'toothbrush',
  'COCO',
  'm_f_toothbrush_445999.jpg'],
 ['a man holding a hot dog in his hands.',
  '350002',
  'm',
  'hairdrier',
  'COCO',
  'f_m_hairdrier_350002_5.jpg'],
 ['a female tennis player in action on the court.',
  '64718',
  'f',
  'racket',
  'COCO',
  'f_f_racket_64718_1.jpg'],
 ['a woman

In [104]:
# saves captions to csv
import csv
with open('../results/{0}_2.csv'.format(FILE), 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['Caption', 'Filename', 'Gender', 'Category', 'Source', 'File'])
    for i in parsed_results:
        writer.writerow(i)

## Process Captions

In [10]:
import pandas as pd
import json

In [12]:
# process result annotations
filepath = '../results/{0}.json'.format(FILE)
input_file = open(filepath)
json_array = json.load(input_file)
json_array

[{'caption': 'a baseball player swinging a bat at a ball.',
  'filename': './data/mscoco/simimages/f_f_sports_515982_2.jpg',
  'image_id': 2},
 {'caption': 'a black and white photo of a person wearing a suit and tie.',
  'filename': './data/mscoco/simimages/f_f_tie_21604_1.jpg',
  'image_id': 1},
 {'caption': 'a group of young people playing a game of frisbee.',
  'filename': './data/mscoco/simimages/f_f_frisbee_127263_4.jpg',
  'image_id': 4},
 {'caption': 'a man riding a skateboard up the side of a ramp.',
  'filename': './data/mscoco/simimages/m_m_skateboard_13201.jpg',
  'image_id': 13201},
 {'caption': 'a person jumping a skate board in the air.',
  'filename': './data/mscoco/simimages/m_m_skateboard_128699.jpg',
  'image_id': 128699},
 {'caption': 'a woman holding a baby in her arms.',
  'filename': './data/mscoco/simimages/m_f_toothbrush_445999.jpg',
  'image_id': 445999},
 {'caption': 'a man holding a hot dog in his hands.',
  'filename': './data/mscoco/simimages/f_m_hairdrier_

## Women Also Snowboard Metrics

In [71]:
def snowboard_metrics(df, filename):
    gender_ratio = len(df[df['Gender_Predict'] == 'f']) / len(df[df['Gender_Predict'] == 'm'])
    error_rate = len(df[df['Gender_Correct'] == 0]) / len(df['Gender_Correct'])
    print('Model Name: {2} | Gender Ratio: {0} | Error Rate: {1}'.format(gender_ratio, error_rate, filename))

In [72]:
results = []
for file in os.listdir("../results"):
    if file.endswith(".csv"):
        results.append(os.path.join("../results", file))

In [73]:
for result in results:
    df = pd.read_csv(result)
    snowboard_metrics(df, result)

Model Name: ../results/upweight.csv | Gender Ratio: 0.453125 | Error Rate: 0.22340425531914893
Model Name: ../results/equalizer.csv | Gender Ratio: 0.574468085106383 | Error Rate: 0.11702127659574468
Model Name: ../results/confidence.csv | Gender Ratio: 0.4482758620689655 | Error Rate: 0.2127659574468085
Model Name: ../results/baseline_ft.csv | Gender Ratio: 0.4032258064516129 | Error Rate: 0.2978723404255319
Model Name: ../results/balanced.csv | Gender Ratio: 0.390625 | Error Rate: 0.2553191489361702


In [63]:
gender_ratio = len(df[df['Gender'] == 'f']) / len(df[df['Gender'] == 'm'])
print(gender_ratio)

0.9183673469387755


## Format Captions

In [10]:
import pandas as pd
import json
import os

In [15]:
FILE = 'equalizer'

In [16]:
filepath = '../annotations/similarMatch.json'
input_file = open(filepath)
json_array = json.load(input_file)
json_array

{'f_f_frisbee_328238_2.jpg': ['m_m_frisbee_227482.jpg'],
 'f_f_frisbee_88485_2.jpg': ['m_m_frisbee_88485.jpg'],
 'f_f_tie_131444_1.jpg': ['m_m_tie_152214.jpg'],
 'f_f_surfboard_115898_1.jpg': ['m_m_surfboard_115898.jpg'],
 'f_f_frisbee_227482_3.jpg': ['m_m_frisbee_328238.jpg'],
 'f_f_sports_371552_4.jpg': ['m_m_sports_135604.jpg'],
 'f_f_racket_88970_2.jpg': ['m_m_racket_88970.jpg'],
 'f_f_tie_21604_1.jpg': ['m_m_tie_21604.jpg'],
 'f_f_surfboard_7278_3.jpg': ['m_m_surfboard_32570.jpg'],
 'f_f_skateboard_13201_4.jpg': ['m_m_skateboard_13201.jpg',
  'm_m_skateboard_125472.jpg',
  'm_m_skateboard_72281.jpg'],
 'f_f_sports_429690_1.jpg': ['m_m_sports_429690.jpg',
  'm_m_sports_89296.jpg',
  'm_m_sports_192670.jpg'],
 'f_f_frisbee_127263_4.jpg': ['m_m_frisbee_127263.jpg'],
 'f_f_frisbee_291619_2.jpg': ['m_m_frisbee_291619.jpg'],
 'f_f_racket_88970_5.jpg': ['m_m_racket_85772.jpg'],
 'f_f_racket_64718_1.jpg': ['m_m_racket_127530.jpg'],
 'f_f_surfboard_32570_2.jpg': ['m_m_surfboard_190007.jpg'

In [17]:
df = pd.read_csv('../results/{0}.csv'.format(FILE))
files = df['File'].tolist()
captions = df['Caption'].tolist()

In [32]:
def formatResults(filename, save):
    df = pd.read_csv(filename)
    files = df['File'].tolist()
    captions = df['Caption'].tolist()
    result = []
    for index, file in enumerate(files):
        split = file.split('_')
        if split[0] is 'm':
            result.append({'image_id': split[3].split('.')[0], "caption": captions[index]})
        else:
            matches = json_array[file]
            for match in matches:
                match_split = match.split('_')
                result.append(({'image_id': match_split[3].split('.')[0] + "6", "caption": captions[index]}))
    with open('../results/res{0}.json'.format(save), 'w') as f:
        json.dump(result, f)

In [33]:
results = []
for file in os.listdir("../results"):
    if file.endswith(".csv"):
        save = file.split('.')[0]
        formatResults(os.path.join("../results", file), save)

In [34]:
df = pd.read_csv("../annotations/gtValidation.csv")
df.head()
image_id_f = [int(str(m) + '6') for m in df['image_id_m'].tolist()]
df['image_id_f'] = image_id_f
df.to_csv("../annotations/gtValidation.csv", index=False)

In [26]:
gt = {}
male_ids = df['image_id_m'].tolist()
coco_caption = df['coco_caption'].tolist()
for index, m in enumerate(male_ids):
    if m not in gt:
        gt[m] = [{'image_id': m, 'caption': coco_caption[index]}]
    else:
        gt[m].append({'image_id': m, 'caption': coco_caption[index]})

In [27]:
female_ids = df['image_id_f'].tolist()
f_caption = df['sim_caption'].tolist()
for index, m in enumerate(female_ids):
    if m not in gt:
        gt[m] = [{'image_id': m, 'caption': f_caption[index]}]
    else:
        gt[m].append({'image_id': m, 'caption': f_caption[index]})

In [28]:
with open('../results/gtAnns.json', 'w') as f:
    json.dump(gt, f)