In [None]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import scipy.io as sio
import matplotlib.pyplot as plt
import mkgu
import dataset
import peewee

In [None]:
hvm = mkgu.get_assembly("HvM")
hvm

In [None]:
hvm.indexes["presentation"].names

In [None]:
hvm_stripped = hvm.copy()
hvm_stripped

In [None]:
hvm_stripped.reset_index("object", inplace=True)
hvm_stripped

In [None]:
hvm_stripped.reset_coords("object", inplace=True, drop=True)
hvm_stripped

In [None]:
hvm_stripped.indexes["presentation"].names

In [None]:
to_remove = ['ryz_semantic', 'variation', 'image_file_name', 'rxy_semantic', 'size', 'category', 'object', 'tz', 'ty', 'rxz_semantic', 'rxy', 'background_id', 's', 'rxz', 'ryz']
for level in to_remove:
    if level in hvm_stripped.indexes["presentation"].names:
        hvm_stripped.reset_index(level, inplace=True)
        hvm_stripped.reset_coords(level, inplace=True, drop=True)
hvm_stripped

In [None]:
presentation_index = hvm.indexes["presentation"]

In [None]:
presentation_index

In [None]:
type(presentation_index)

In [None]:
to_keep = set(presentation_index.names).difference(to_remove)
to_keep

In [None]:
lookup = mkgu.fetch.SQLiteLookup()
lookup

In [None]:
lookup.db_file

In [None]:
def resolve(table, record_dict, keys):
    record_id = table.upsert(record_dict, keys=keys)
    if type(record_id) == bool:
        record_id = table.find_one(**{key: record_dict[key] for key in keys})["id"]
    return record_id

In [None]:
pwd

In [None]:
pwdb = peewee.SqliteDatabase("image_meta_scratch.db")

In [None]:
class Foo(peewee.Model):
    bar = peewee.CharField()
    baz = peewee.IntegerField()
    class Meta:
        database = pwdb

In [None]:
pwdb.connect()

In [None]:
pwdb.create_tables([Foo])

In [None]:
pwdb.database

In [None]:
foo_first = Foo(bar="Wow!", baz=4)
foo_first.save()

In [None]:
for fields in [
    ("Zoom!", 27),
    ("Bam!", 777),
    ("Whoosh!", 1000000002)
]:
    Foo(**dict(zip(("bar", "baz"), fields))).save()

In [None]:
got = Foo.select()
got

In [None]:
[(x.id, x.bar, x.baz) for x in got]

In [None]:
wow = got[0]

In [None]:
wow.id

In [None]:
class Image(peewee.Model):
    hash_id = peewee.CharField()
    object_name = peewee.CharField()
    category_name = peewee.CharField()
    background_id = peewee.CharField()
    image_file_name = peewee.CharField()
    variation = peewee.IntegerField()
    ty = peewee.FloatField()
    tz = peewee.FloatField()
    rxy = peewee.FloatField()
    rxz = peewee.FloatField()
    ryz = peewee.FloatField()
    rxy_semantic = peewee.FloatField()
    rxz_semantic = peewee.FloatField()
    ryz_semantic = peewee.FloatField()
    size = peewee.FloatField()
    s = peewee.FloatField()

    class Meta:
        database = pwdb

In [None]:
class StimulusSet(peewee.Model):
    name = peewee.CharField()
    
    class Meta:
        database = pwdb

In [None]:
class ImageStore(peewee.Model):
    location = peewee.CharField()
    location_type = peewee.CharField()
    store_type = peewee.CharField()
    
    class Meta:
        database = pwdb

In [None]:
class StimulusSetImageMap(peewee.Model):
    stimulus_set = peewee.ForeignKeyField(StimulusSet, backref="stimulus_set_image_maps")
    image = peewee.ForeignKeyField(Image, backref="stimulus_set_image_maps")
   
    class Meta:
        database = pwdb

In [None]:
class ImageImageStoreMap(peewee.Model):
    image_store = peewee.ForeignKeyField(ImageStore, backref="image_image_store_maps")
    image = peewee.ForeignKeyField(Image, backref="image_image_store_maps")
    path = peewee.CharField()
   
    class Meta:
        database = pwdb    

In [None]:
pw_classes = [Image, StimulusSet, ImageStore, StimulusSetImageMap, ImageImageStoreMap]

In [None]:
pwdb.drop_tables(pw_classes)

In [None]:
pwdb.create_tables(pw_classes)

In [None]:
hvm_images = StimulusSet(name="HvM")
hvm_images.save()

In [None]:
df_stimulus = hvm.indexes["presentation"].to_frame()
df_stimulus

In [None]:
import hashlib
fn = b"TURTLE_L_rx+69.658_ry+61.236_rz+07.161_tx+00.318_ty-01.053_s+00.921_5068064aee7a1831c3a5fe0f023b05cffca511e3_256x256"
# fn = b"TURTLE_L_rx+69.658_ry+61.236_rz+07.161_tx+00.318_ty-01.053_s+00.921_5068064aee7a1831c3a5fe0f023b05cffca511e3"
# fn = b"TURTLE_L_rx+69.658_ry+61.236_rz+07.161_tx+00.318_ty-01.053_s+00.921_5068064aee7a1831c3a5fe0f023b05cffca511e3_256x256.png"
fhash = "d15aa059b0180b200fb82fb8039247c3b53f9d66"
hashlib.sha1(fn).hexdigest()

In [None]:
fpath = "/braintree/home/jjpr/.skdata/HvMWithDiscfade_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/Variation06_20110131/TURTLE_L_rx+69.658_ry+61.236_rz+07.161_tx+00.318_ty-01.053_s+00.921_5068064aee7a1831c3a5fe0f023b05cffca511e3_256x256.png"
hashlib.sha1(open(fpath, "rb").read()).hexdigest()

In [None]:
ls /braintree/home/jjpr/.skdata/HvMWithDiscfade_6eef6648406c333a4035cd5e60d0bf2ecf2606d7

In [None]:
path_map = {"V0": "Variation00_20110203", "V3": "Variation03_20110128", "V6": "Variation06_20110131"}

In [None]:
hvm_image_store = ImageStore(location_type="S3", store_type="zip", location="http://dicarlocox-datasets.s3.amazonaws.com/HvM_with_discfade.zip")
hvm_image_store.save()

In [None]:
# pwdb.drop_tables([ImageStore])

In [None]:
df_images = df_stimulus[df_stimulus["variation"]!="nan"].drop(columns=["stimulus", "repetition"]).drop_duplicates()
df_images

In [None]:
len(df_images["image_id"].unique())

In [None]:
pw_images = []
for image in df_images.itertuples():
    pw_image = Image(
        hash_id=image.image_id,
        object_name=image.object,
        category_name=image.category,
        background_id=image.background_id,
        image_file_name=image.image_file_name,
        variation=int(image.variation[-1]),
        ty=image.ty,
        tz=image.tz,
        rxy=image.rxy,
        rxz=image.rxz,
        ryz=image.ryz,
        rxy_semantic=image.rxy_semantic,
        rxz_semantic=image.rxz_semantic,
        ryz_semantic=image.ryz_semantic,
        size=image.size,
        s=image.s
    )
    pw_images.append(pw_image)
    pw_image.save()
    print(image.image_file_name, image.variation)

In [None]:
images_query = Image.select()
images_query

In [None]:
image_query_0 = images_query[0]

In [None]:
len(images_query)

In [None]:
image_query_0.object_name

In [None]:
df_images.columns

In [None]:
df_images["variation"].unique()

In [None]:
df_stimulus[df_stimulus["variation"]!="nan"]

In [None]:
hvm.sel(variation="nan")

In [None]:
np.unique(hvm["variation"].values)

In [None]:
hvm.loc[xr.ufuncs.logical_or(hvm["region"] == "V4", hvm["region"] == "IT")]
# xr.ufuncs.logical_or(hvm["region"] == "V4", hvm["region"] == "IT")

In [None]:
Image.drop_table()
list(Image.select())

In [None]:
Image.create_table()
list(Image.select())

In [None]:
pw_images = []
pw_stimulus_set_image_maps = []
pw_image_image_store_maps = []
for image in df_images.itertuples():
    pw_image = Image(
        hash_id=image.image_id,
        object_name=image.object,
        category_name=image.category,
        background_id=image.background_id,
        image_file_name=image.image_file_name,
        variation=int(image.variation[-1]),
        ty=image.ty,
        tz=image.tz,
        rxy=image.rxy,
        rxz=image.rxz,
        ryz=image.ryz,
        rxy_semantic=image.rxy_semantic,
        rxz_semantic=image.rxz_semantic,
        ryz_semantic=image.ryz_semantic,
        size=image.size,
        s=image.s
    )
    pw_stimulus_set_image_map = StimulusSetImageMap(stimulus_set=hvm_images, image=pw_image)
    pw_image_image_store_map = ImageImageStoreMap(image=pw_image, image_store=hvm_image_store, 
                                                  path=path_map[image.variation])
    
    pw_images.append(pw_image)
    pw_image.save()
    
    pw_stimulus_set_image_maps.append(pw_stimulus_set_image_map)
    pw_stimulus_set_image_map.save()
    
    pw_image_image_store_maps.append(pw_image_image_store_map)
    pw_image_image_store_map.save()
    
    print(image.image_file_name, image.variation)

In [None]:
pw_query = (StimulusSet
           .select()
           .join(StimulusSetImageMap)
           .join(Image)
           .join(ImageImageStoreMap)
           .join(ImageStore))

In [None]:
list(pw_query)

In [None]:
len(pw_query)

In [None]:
pw_hvm = StimulusSet.get(StimulusSet.name == "HvM")
pw_hvm

In [None]:
[(x.image.hash_id, x.image.image_image_store_maps[0].image_store.location) for x in pw_hvm.stimulus_set_image_maps]

In [None]:
len(pw_hvm.stimulus_set_image_maps)

In [None]:
[(x.__name__, x.select().count()) for x in pw_classes]

In [None]:
pw_query.sql()

In [None]:
dir()

In [None]:
df_images.to_pickle("image_meta_dataframe.pkl")