# This notebook contains methods for turning a database of images into stimuli
This is for generating stimuli from a collection of BPG and human-compressed images. The notebook assumes:
* All images (BPG-compressed, human-compressed, and original files) are PNGs that are hosted in a publicly-viewable Google Drive folder
* Basic info about the images database (Google Drive folder) are recorded in a .csv file which is read into the variable `data_file`
* BPG-compressed images have filenames containing the image height in pixels
* Human-compressed images have file suffix '_final.png'

Import python libraries

In [1]:
import json
import numpy as np
import csv
import pymongo as pm
import copy

Define data file and other necessary variables (e.g. Google image file prefix)

In [2]:
data_file = 'img_ids.csv'
google_image_str = 'http://drive.google.com/uc?export=view&id='  # Prefix for accessing Google Image files

Read in data file and print header

In [3]:
data_dict = {}
line_counter = 0
with open(data_file) as csv_file:
    for line in csv_file:
        if line_counter == 0:
            header_list = line.strip().split(',')
        else:
            line_data = line.strip().split(',')
            data_dict[line_data[0]] = {i: j for (i, j) in zip(header_list[1:], line_data[1:])}
        line_counter += 1
print(header_list)

['Name', 'Date', 'Size', 'URL', 'ID']


## Create stimuli list based on data

In [4]:
image_types = [i for i in set([i.split('_')[0] for i in list(data_dict.keys())]) if (len(i) > 0) & ('.' not in i)]
stimuli_list = []
for image_type in image_types:
    if len(image_type) > 1:
        png_files = [i for i in data_dict.keys() if (image_type in i) & ('.png' in i)]
        compressed_files = [i for i in png_files if ('bpg' in i) or ('final' in i)]
        original_files = [i for i in png_files if ('original') in i or (('pxh' in i) & ('bpg' not in i))]
        if len(original_files) > 1:
            original_file = [i for i in original_files if 'px' in i][0]
        else:
            original_file = original_files[0]
        for compressed_file in compressed_files:
            if 'final' in compressed_file:
                image_height = 'NA'
                compression_level ='human'
                compression_mode = 'human'
            else:
                image_height = compressed_file.split('_')[1].split('pxh')[0]
                compression_level = compressed_file.split('_bpg')[1].split('.')[0]
                compression_mode = 'bpg'
            stimuli_list.append(
                {
                    'image_filename' : compressed_file,
                    'image_name': compressed_file.split('_')[0],
                    'image_height': image_height,
                    'compression_level': compression_level,
                    'compression_mode': compression_mode,
                    'original_url': google_image_str + data_dict[original_file]['ID'],
                    'compressed_url' : google_image_str + data_dict[compressed_file]['ID']
                }
            )

Check stimuli list

In [5]:
stimuli_list[0]

{'compressed_url': 'http://drive.google.com/uc?export=view&id=1zFbvwgb4uLHLzilqgvsIdrn2TQXm6SQ_',
 'compression_level': '43',
 'compression_mode': 'bpg',
 'image_filename': 'rockwall_1029pxh_bpg43.png',
 'image_height': '1029',
 'image_name': 'rockwall',
 'original_url': 'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-'}

## Now that the list of stimuli is created, we can export them in two ways

### Option 1: Connect to a mongodb server and insert stimuli

In [6]:
pswd = 'popeye2mongo'
user = 'ihwang'
host = 'popeye2'

conn = pm.MongoClient('mongodb://' + user + ':' + pswd + '@127.0.0.1')
conn.database_names()  # Check databases inside the connected server

  


[u'admin',
 u'config',
 u'highschoolers',
 u'kiddraw',
 u'local',
 u'stimuli',
 u'testDatabase']

Create a database called `stimuli` (or access it if it already exists), create a collection called `bpg_hc_eval` (or access it if it already exists) and insert stimuli from list

In [7]:
mongo_stimuli_list = copy.deepcopy(stimuli_list)
db = conn['stimuli']
coll = db['bpg_hc_eval'] 
for i in mongo_stimuli_list:
    print(i)
    coll.insert_one(i)

{'compressed_url': 'http://drive.google.com/uc?export=view&id=1zFbvwgb4uLHLzilqgvsIdrn2TQXm6SQ_', 'original_url': 'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-', 'image_name': 'rockwall', 'image_height': '1029', 'compression_level': '43', 'compression_mode': 'bpg', 'image_filename': 'rockwall_1029pxh_bpg43.png'}
{'compressed_url': 'http://drive.google.com/uc?export=view&id=1SsYZIe0gEU2xbYuqRcDf7oJ0LGrJELpS', 'original_url': 'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-', 'image_name': 'rockwall', 'image_height': '1029', 'compression_level': '39', 'compression_mode': 'bpg', 'image_filename': 'rockwall_1029pxh_bpg39.png'}
{'compressed_url': 'http://drive.google.com/uc?export=view&id=1NdtOkpvSJWskPgr6C8mnHOG5ZdIx0HGX', 'original_url': 'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-', 'image_name': 'rockwall', 'image_height': '1029', 'compression_level': '34', 'compression_mode': 'bpg', 'image_filena

Check that `mongo_stimuli_list` now has `ObjectId`s inserted

In [8]:
mongo_stimuli_list[0]

{'_id': ObjectId('5ca419f8b344ba60f9541c24'),
 'compressed_url': 'http://drive.google.com/uc?export=view&id=1zFbvwgb4uLHLzilqgvsIdrn2TQXm6SQ_',
 'compression_level': '43',
 'compression_mode': 'bpg',
 'image_filename': 'rockwall_1029pxh_bpg43.png',
 'image_height': '1029',
 'image_name': 'rockwall',
 'original_url': 'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-'}

Check how many entries are inside the collection

In [9]:
coll.count()

  """Entry point for launching an IPython kernel.


468

Check distinct types of a given attribute

In [10]:
attribute = 'compression_mode'
coll.distinct(attribute)

[u'bpg', u'human']

Get an instance of an attribute

attribute = 'compressed_url'
index = 0
stimuli_list[index][attribute]

Examine a random document in the collection

In [11]:
coll.find_one()

{u'_id': ObjectId('5ca3fc64b344ba55de40525c'),
 u'compressed_url': u'http://drive.google.com/uc?export=view&id=1zFbvwgb4uLHLzilqgvsIdrn2TQXm6SQ_',
 u'compression_level': u'43',
 u'compression_mode': u'bpg',
 u'image_filename': u'rockwall_1029pxh_bpg43.png',
 u'image_height': u'1029',
 u'image_name': u'rockwall',
 u'orig_url': u'http://drive.google.com/uc?export=view&id=1svOMbWnGZo8SsWb4_2wgaKuDbPovLEF-'}

### Option 2: Write stimuli to a json file

Create `json_file` variable

In [12]:
json_file = 'bpg_hc_stimuli.json'

In [13]:
with open(json_file, 'w') as file_write:
    json.dump(stimuli_list, file_write)