In [1]:
import MySQLdb as mdb
import datetime
import sys
import os
from peewee import *
import glob
from collections import defaultdict
import json
import shortuuid
import random
from datetime import datetime

import PIL
from PIL import Image
from PIL import ImageFilter
from os import listdir
from os.path import isfile, join
from shutil import copyfile
import numpy

config = json.load(open('config.json', 'r'))

con = mdb.connect(host="localhost", user=config['db_user'], db=config['db_name'], passwd=config['db_pass'])
database = MySQLDatabase(config['db_name'], user=config['db_user'], password=config['db_pass'])
class BaseModel(Model):
    class Meta:
        database=database

class Image(BaseModel):
    imageId = CharField(primary_key=True, index=True)
    imageName = CharField()
    imageType = CharField()
    imageSubType = CharField()
    numHitsFinished = CharField()

class Caption(BaseModel):
    captionId = CharField(primary_key=True, index=True)
    caption = CharField()
    image = ForeignKeyField(Image)

class AMTHits(BaseModel):
    id = CharField(primary_key=True)
    socketId = CharField()
    assignmentId = CharField()
    workerId = CharField()
    approve = CharField(default='notApprove')
    hitId = CharField()
    status = CharField()
    isPaid = BooleanField(default='0')
    bonus = IntegerField(default=0)
    hitIden = CharField()
    comment = CharField()
    image = ForeignKeyField(Image)
    caption = ForeignKeyField(Caption)
    created_at = IntegerField(default=int(datetime.now().strftime('%s')))
    completed_at = IntegerField(default=0)

class Feedback(BaseModel):
    workerId = CharField()
    hitId = CharField()
    assignmentId = CharField()
    sequenceId = CharField()
    feedback = TextField()

class Question(BaseModel):
    id = CharField(primary_key=True, index=True)
    question = CharField()
    image = ForeignKeyField(Image)
    annotationId = ForeignKeyField(AMTHits)
    sequenceId = CharField()
    socketId = CharField()
    sourceId = CharField()
    destId = CharField()
    created_at = IntegerField(default=int(datetime.now().strftime('%s')))

class Answer(BaseModel):
    id = CharField(primary_key=True, index=True)
    answer = CharField()
    question = ForeignKeyField(Question)
    image = ForeignKeyField(Image)
    annotationId = ForeignKeyField(AMTHits)
    sequenceId = CharField()
    socketId = CharField()
    sourceId = CharField()
    destId = CharField()
    created_at = IntegerField(default=int(datetime.now().strftime('%s')))

def createDatabaseTables():
    database.connect()

    if not Image.table_exists():
        database.create_table(Image)
    if not Caption.table_exists():
        database.create_table(Caption)
    if not AMTHits.table_exists():
        database.create_table(AMTHits)
    if not Feedback.table_exists():
        database.create_table(Feedback)
    if not Question.table_exists():
        database.create_table(Question)
    if not Answer.table_exists():
        database.create_table(Answer)
    print "All database tables created."

def fillPilotData():

    split = 'train2014' # TODO
    print 'Loading caption ' + split + ' data...'

    cocoPath = '/home/hudaalamri/visdial-amt-chat/nodejs/static/train2014/' # TODO
    captionsPath = '/home/hudaalamri/visdial-amt-chat/nodejs/static/annotations/' # TODO

    f1 = open(os.path.join(captionsPath, 'captions_' + split + '.json'))
    captionData = json.loads(f1.read())
    f1.close()
    imdir='COCO_%s_%012d.jpg'
    subtype = split

    image_path = cocoPath + subtype + '/'
    image_list = glob.glob(image_path + '*.jpg')

    query = "SELECT * FROM image WHERE imageSubType = '" + split + "'"
    print(query)

    with con:
        cur = con.cursor()
        cur.execute(query)

        c = 0
        for i in range(cur.rowcount):
            row = cur.fetchone()
            if (image_path + row[1]) in image_list:
                image_list.remove(image_path + row[1])
                c +=1

        print c

    count = 0
    new_list = []
    for name in image_list:
        new_list.append(name[len(image_path):])
        count = count +1

    print len(new_list)

    caption = defaultdict(list)
    for cap in captionData['annotations']:
        image_name = imdir%(subtype, cap['image_id'])
        caption[image_name].append(cap)

    imageData = []
    for imname in new_list:
        imgid = caption[imname][0]['image_id']
        imageData.append({'imageId':str(imgid), 'imageName':imname, 'imageType':'mscoco', 'imageSubType':subtype, 'numHitsFinished':'0'})

    with database.atomic():
        for idx in range(0, len(imageData), 200):
            print(idx)
            s = idx
            e = min(idx+200, len(imageData))
            Image.insert_many(imageData[s:e]).execute()

    c = 0
    captionData = []
    for imname in new_list:
        c = c+1
        imgid = caption[imname][0]['image_id']
        idx = random.randint(0, len(caption[imname])-1)
        cap = caption[imname][idx] # random select one caption from the database.
        image = Image.get(Image.imageId == str(imgid))
        captionData.append({'captionId': cap['id'], 'caption': cap['caption'], 'image': image})

    with database.atomic():
        for idx in range(0, len(captionData), 200):
            print(idx)
            s = idx
            e = min(idx+200, len(captionData))
            Caption.insert_many(captionData[s:e]).execute()

    print "Captions table for " + split + " created."

def createRedisQueue():
    print "createRedisQueue called."

    import redis
    r = redis.StrictRedis(host='localhost', port=6379, db=0) # TODO

    with con:
        cur = con.cursor()
        cur.execute("SELECT * FROM image WHERE imageSubType = 'train2014' AND numHitsFinished = 0")
        r.delete('visdial_queue')
        count = 0
        for i in range(cur.rowcount):
            row = cur.fetchone()
            if(count < 5000): # push 5k images into queue
                print row[1], row[4]
                r.rpush('visdial_queue', row[1])
                count += 1

        print count

## ------------------------------------------------------------------------------------------------------


In [2]:
createDatabaseTables()

All database tables created.


In [4]:
fillPilotData()

Loading caption train2014 data...
SELECT * FROM image WHERE imageSubType = 'train2014'
82783
0
Captions table for train2014 created.


In [5]:
createRedisQueue()

createRedisQueue called.
COCO_train2014_000000100012.jpg 0
COCO_train2014_000000100014.jpg 0
COCO_train2014_000000100020.jpg 0
COCO_train2014_000000100022.jpg 0
COCO_train2014_000000100034.jpg 0
COCO_train2014_000000100037.jpg 0
COCO_train2014_000000010005.jpg 0
COCO_train2014_000000100050.jpg 0
COCO_train2014_000000100063.jpg 0
COCO_train2014_000000100064.jpg 0
COCO_train2014_000000100078.jpg 0
COCO_train2014_000000100084.jpg 0
COCO_train2014_000000100095.jpg 0
COCO_train2014_000000100124.jpg 0
COCO_train2014_000000100128.jpg 0
COCO_train2014_000000100140.jpg 0
COCO_train2014_000000100142.jpg 0
COCO_train2014_000000010015.jpg 0
COCO_train2014_000000100157.jpg 0
COCO_train2014_000000100159.jpg 0
COCO_train2014_000000100169.jpg 0
COCO_train2014_000000100177.jpg 0
COCO_train2014_000000100182.jpg 0
COCO_train2014_000000100202.jpg 0
COCO_train2014_000000100207.jpg 0
COCO_train2014_000000100209.jpg 0
COCO_train2014_000000100222.jpg 0
COCO_train2014_000000100223.jpg 0
COCO_train2014_00000010

COCO_train2014_000000010546.jpg 0
COCO_train2014_000000105468.jpg 0
COCO_train2014_000000105469.jpg 0
COCO_train2014_000000010547.jpg 0
COCO_train2014_000000105470.jpg 0
COCO_train2014_000000105472.jpg 0
COCO_train2014_000000105473.jpg 0
COCO_train2014_000000105478.jpg 0
COCO_train2014_000000105504.jpg 0
COCO_train2014_000000105511.jpg 0
COCO_train2014_000000105515.jpg 0
COCO_train2014_000000105516.jpg 0
COCO_train2014_000000105520.jpg 0
COCO_train2014_000000105522.jpg 0
COCO_train2014_000000105529.jpg 0
COCO_train2014_000000105531.jpg 0
COCO_train2014_000000105532.jpg 0
COCO_train2014_000000105545.jpg 0
COCO_train2014_000000105546.jpg 0
COCO_train2014_000000105561.jpg 0
COCO_train2014_000000105564.jpg 0
COCO_train2014_000000105576.jpg 0
COCO_train2014_000000105580.jpg 0
COCO_train2014_000000105582.jpg 0
COCO_train2014_000000105589.jpg 0
COCO_train2014_000000105590.jpg 0
COCO_train2014_000000105592.jpg 0
COCO_train2014_000000105594.jpg 0
COCO_train2014_000000010560.jpg 0
COCO_train2014

COCO_train2014_000000113857.jpg 0
COCO_train2014_000000113861.jpg 0
COCO_train2014_000000113879.jpg 0
COCO_train2014_000000113880.jpg 0
COCO_train2014_000000113893.jpg 0
COCO_train2014_000000113898.jpg 0
COCO_train2014_000000001139.jpg 0
COCO_train2014_000000113929.jpg 0
COCO_train2014_000000113944.jpg 0
COCO_train2014_000000113949.jpg 0
COCO_train2014_000000113952.jpg 0
COCO_train2014_000000113956.jpg 0
COCO_train2014_000000113962.jpg 0
COCO_train2014_000000113967.jpg 0
COCO_train2014_000000113970.jpg 0
COCO_train2014_000000113979.jpg 0
COCO_train2014_000000011398.jpg 0
COCO_train2014_000000113982.jpg 0
COCO_train2014_000000113985.jpg 0
COCO_train2014_000000113991.jpg 0
COCO_train2014_000000113998.jpg 0
COCO_train2014_000000011401.jpg 0
COCO_train2014_000000011402.jpg 0
COCO_train2014_000000114024.jpg 0
COCO_train2014_000000114027.jpg 0
COCO_train2014_000000114028.jpg 0
COCO_train2014_000000011403.jpg 0
COCO_train2014_000000114034.jpg 0
COCO_train2014_000000114035.jpg 0
COCO_train2014

COCO_train2014_000000122105.jpg 0
COCO_train2014_000000122108.jpg 0
COCO_train2014_000000122109.jpg 0
COCO_train2014_000000122116.jpg 0
COCO_train2014_000000122117.jpg 0
COCO_train2014_000000122118.jpg 0
COCO_train2014_000000122123.jpg 0
COCO_train2014_000000012213.jpg 0
COCO_train2014_000000122137.jpg 0
COCO_train2014_000000122142.jpg 0
COCO_train2014_000000122144.jpg 0
COCO_train2014_000000122147.jpg 0
COCO_train2014_000000122157.jpg 0
COCO_train2014_000000122159.jpg 0
COCO_train2014_000000122163.jpg 0
COCO_train2014_000000122164.jpg 0
COCO_train2014_000000122182.jpg 0
COCO_train2014_000000122188.jpg 0
COCO_train2014_000000122189.jpg 0
COCO_train2014_000000122194.jpg 0
COCO_train2014_000000122196.jpg 0
COCO_train2014_000000122207.jpg 0
COCO_train2014_000000122216.jpg 0
COCO_train2014_000000122231.jpg 0
COCO_train2014_000000122235.jpg 0
COCO_train2014_000000122238.jpg 0
COCO_train2014_000000012224.jpg 0
COCO_train2014_000000122250.jpg 0
COCO_train2014_000000122255.jpg 0
COCO_train2014

COCO_train2014_000000130163.jpg 0
COCO_train2014_000000130164.jpg 0
COCO_train2014_000000130175.jpg 0
COCO_train2014_000000130180.jpg 0
COCO_train2014_000000130181.jpg 0
COCO_train2014_000000130183.jpg 0
COCO_train2014_000000130184.jpg 0
COCO_train2014_000000130188.jpg 0
COCO_train2014_000000130192.jpg 0
COCO_train2014_000000013020.jpg 0
COCO_train2014_000000130206.jpg 0
COCO_train2014_000000130215.jpg 0
COCO_train2014_000000130221.jpg 0
COCO_train2014_000000130222.jpg 0
COCO_train2014_000000130226.jpg 0
COCO_train2014_000000130228.jpg 0
COCO_train2014_000000130239.jpg 0
COCO_train2014_000000130245.jpg 0
COCO_train2014_000000130252.jpg 0
COCO_train2014_000000130269.jpg 0
COCO_train2014_000000130270.jpg 0
COCO_train2014_000000130271.jpg 0
COCO_train2014_000000130275.jpg 0
COCO_train2014_000000130280.jpg 0
COCO_train2014_000000130286.jpg 0
COCO_train2014_000000130287.jpg 0
COCO_train2014_000000130292.jpg 0
COCO_train2014_000000130295.jpg 0
COCO_train2014_000000130313.jpg 0
COCO_train2014

In [6]:
from boto.mturk.connection import MTurkConnection
from boto.mturk.question import ExternalQuestion
from boto.mturk.price import Price
from boto.mturk.qualification import *
from boto.mturk.connection import MTurkRequestError

In [7]:
HOST = 'mechanicalturk.amazonaws.com'
SANDBOX_HOST = 'mechanicalturk.sandbox.amazonaws.com'
mtc = None
is_prod = True # TODO
NUM_HITS = 6000

In [8]:
region_name = 'us-east-1'

ACCESS_ID = 'AKIAJBRWS2NLYBYXVACQ'
SECRET_KEY = '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'
endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'


In [9]:
def getConnection(is_prod = False):
    if is_prod:
        mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=SANDBOX_HOST)
    else:
        mtc = MTurkConnection(aws_access_key_id=ACCESS_ID,
                      aws_secret_access_key=SECRET_KEY,
                      host=SANDBOX_HOST)

    #print mtc.get_account_balance()
    return mtc

In [10]:
url = "https:127.0.0.1:5000" # TODO
title = "Live Q/A about an Image (With Captions)"
description = "Ask or Answer questions about an image with a fellow Turker."
keywords = ["image", "chat", "question", "answer"]
frame_height = "1200"
amountToPay = 0.15

In [11]:
def create_new_hit_type(is_prod = False):
    if is_prod:
        quals = Qualifications()
        quals.add(NumberHitsApprovedRequirement('GreaterThanOrEqualTo',
                                                        5000,
                                                        required_to_preview=False))
        quals.add(PercentAssignmentsApprovedRequirement('GreaterThanOrEqualTo',
                                                        95,
                                                        required_to_preview=False))
        quals.add(LocaleRequirement('EqualTo',
                                    'US',
                                    required_to_preview=False))
    else:
        quals = None
    new_hit_type = mtc.register_hit_type(title=title,
                                              reward=Price(amount=amountToPay),
                                              description=description,
                                              keywords = keywords,
                                              duration = 1200,
                                              qual_req=quals)
    print new_hit_type[0].HITTypeId
    return new_hit_type[0].HITTypeId


In [12]:
def create_hits(hit_type_id, no_of_hits = NUM_HITS, no_of_assignments = 1, amount = amountToPay, duration = 1200):
    questionform = ExternalQuestion(url, frame_height)
    for _ in xrange(no_of_hits):
        print _
        create_hit_result = mtc.create_hit(
            hit_type = hit_type_id,
            max_assignments = no_of_assignments,
            question = questionform,
            reward = Price(amount=amount),
        )

In [13]:
def cancel_hits():
    count = 0
    count_exp = 0
    pages = 1000 # 100 per page

    for i in range(pages):
        page = i + 1
        hits = mtc.search_hits(sort_direction="Descending", page_size=100, page_number=page)
        for j in hits:
            if j.Title == title:
                try:
                    print j.HITId
                    mtc.expire_hit(j.HITId)
                    #  mtc.dispose_hit(j.HITId)
                except MTurkRequestError:
                    print  j.HITId
                    #  assns = mtc.get_assignments(j.HITId)
                    #  for i in assns:
                        #  print 'Approving', j.HITId, i.AssignmentId
                        #  mtc.approve_assignment(i.AssignmentId)
                count += 1
            if count == NUM_HITS:
                break

    print count

In [None]:
def get_results():
    result_hit = []

    all_hits = mtc.get_all_hits()

    for it in all_hits:
        id = it.HITId
        result_hit.append(mtc.get_assignments(id))

    return result_hit

mtc = getConnection(is_prod)
create_hits(create_new_hit_type(is_prod))
# cancel_hits()