In [1]:
import pandas as pd
import sys
sys.path.append("/Users/damoncrockett/ivpy/src")
from ivpy import *
import warnings
warnings.filterwarnings('ignore')

In [2]:
db = pd.read_pickle("/Users/damoncrockett/lmlproc/dbdfgepneural.pkl")

In [3]:
db.rename(columns={"g60":"gloss","ebins":"expressivenessGroup","tbins":"isoGroup"},inplace=True)

In [4]:
for i in db.index:
    catalog = db.catalog.loc[i]
    sb = db.sb.loc[i]
    
    if not sb:
        for j,c in enumerate(catalog):
            if c.isalpha():
                newcatalog = f'{catalog[:j]}_{catalog[j:]}'
                db.catalog.loc[i] = newcatalog
                #print(f'Changed {catalog} to {newcatalog}')

In [5]:
pbcols = [
    'catalog','man','bran','year','circa',
    'surf','xword', 'gword', 'cword', 'tword',
    'backp','postcard','toner','resin','sb','sbid','suffs',
    'dmin', 'dmax', 'dminHex', 'dmaxHex', 'auc', 'roughness','gloss','thickness',
    'expressiveness','radarGroup','expressivenessGroup','isoGroup',
    'gep100','gep150','gep200','gep250','gep300','tmap'
]

In [6]:
def whiten(s):
    try:
        if "specified]" in s or "[Indeterminate]" in s:
            return ''
        else:
            return s
    except:
        return s

In [7]:
db = db[pbcols].applymap(whiten)

In [8]:
db.rename(columns={
    "xword":"textureWord",
    "gword":"glossWord",
    "cword":"colorWord",
    "tword":"thicknessWord"
},inplace=True)

In [9]:
import numpy as np

In [10]:
db.roughness = db.roughness.apply(lambda x: round(x,3))
db.auc = db.auc.apply(lambda x: round(x,3))
db.gloss = db.gloss.apply(lambda x: round(x,3))
db.thickness = db.thickness.apply(lambda x: round(x,3))
db.expressiveness = db.expressiveness.apply(lambda x: round(x,3))

# center GEP

In [11]:
fullrows = db.loc[db.radarGroup.notnull()].index

In [12]:
def makeshift(l,xshift,yshift):
    return [l[0]-xshift,l[1]-yshift,0]

In [13]:
for spread_factor in [100,150,200,250,300]:
    gepcol = f'gep{spread_factor}'
    gepdf = db[gepcol].loc[fullrows]
    
    xrange = gepdf.apply(lambda x: x[0]).max() - gepdf.apply(lambda x: x[0]).min()
    xshift = int(xrange/2)
    
    yrange = gepdf.apply(lambda x: x[1]).max() - gepdf.apply(lambda x: x[1]).min()
    yshift = int(yrange/2)
    
    gepdf = gepdf.apply(lambda l: makeshift(l,xshift,yshift))
    db[gepcol].loc[fullrows] = gepdf

# JSON

In [14]:
def fillrand(cell):
    if isinstance(cell,list):
        return cell
    else:
        return [
            np.random.randint(200,500),
            np.random.randint(200,500),
            0
        ]

In [15]:
db[[c for c in db.columns if 'gep' in c]] = db[[c for c in db.columns if 'gep' in c]].applymap(fillrand)

In [16]:
db.tmap = db.tmap.apply(fillrand)

In [17]:
db = db.fillna('')

#### Has processing instructions

In [23]:
import glob,os

In [25]:
allproc = glob.glob(os.path.join("/Users/damoncrockett/paperbase/src/assets/img/processing/","*.pdf"))

In [28]:
procats = [os.path.basename(item)[:-4] for item in allproc]

In [31]:
db['processing'] = 0

In [32]:
db.processing.loc[db.catalog.isin(procats)] = 1

In [33]:
dbjson = {col: db[col].tolist() for col in db.columns}

In [34]:
import json

with open('/Users/damoncrockett/paperbase/src/assets/data/data.json', 'w') as json_file:
    json.dump(dbjson, json_file)