In [1]:
import pandas as pd
import sys
sys.path.append("/Users/damoncrockett/ivpy/src")
from ivpy import *
import warnings
warnings.filterwarnings('ignore')

In [2]:
db = pd.read_pickle("/Users/damoncrockett/lmlproc/dbdfgepneural.pkl")

In [3]:
db.rename(columns={"g60":"gloss","ebins":"expressivenessGroup","tbins":"isoGroup"},inplace=True)

In [4]:
for i in db.index:
    catalog = db.catalog.loc[i]
    sb = db.sb.loc[i]
    
    if not sb:
        for j,c in enumerate(catalog):
            if c.isalpha():
                newcatalog = f'{catalog[:j]}_{catalog[j:]}'
                db.catalog.loc[i] = newcatalog
                #print(f'Changed {catalog} to {newcatalog}')

In [10]:
db.pid.value_counts()

Kodak Velox 1948 F Smooth Glossy White Single Weight                                                               23
Kodak Velox 1944 F Smooth Glossy White Single Weight                                                               21
Agfa Gevaert Brovira 1970 1 [texture unspecified] Glossy Extra White Single Weight                                 20
Kodak Velox 1947 F Smooth Glossy White Single Weight                                                               18
Kodak Velox 1915 Velvet (Velox) [texture unspecified] Semi Glossy [base color unspecified] [weight unspecified]    18
                                                                                                                   ..
Granville Deluxe Bromide 1930 A [texture unspecified] Glossy [base color unspecified] [weight unspecified]          1
Agfa Portriga 1960 132 Smooth Semi Matte Ivory Double Weight                                                        1
Agfa Gevaert Portriga Rapid 1971 112 Smooth Semi Matte E

In [5]:
pbcols = [
    'catalog','man','bran','year','circa',
    'surf','xword', 'gword', 'cword', 'tword','dims',
    'backp','postcard','toner','resin','sb','sbid','suffs',
    'dmin', 'dmax', 'dminHex', 'dmaxHex', 'auc', 'roughness','gloss','thickness',
    'expressiveness','radarGroup','expressivenessGroup','isoGroup',
    'gep100','gep150','gep200','gep250','gep300','tmap'
]

In [6]:
def whiten(s):
    try:
        if "specified]" in s or "[Indeterminate]" in s:
            return ''
        else:
            return s
    except:
        return s

In [7]:
db = db[pbcols].applymap(whiten)

In [8]:
db.rename(columns={
    "xword":"textureWord",
    "gword":"glossWord",
    "cword":"colorWord",
    "tword":"thicknessWord"
},inplace=True)

In [9]:
import numpy as np

In [10]:
db.roughness = db.roughness.apply(lambda x: round(x,3))
db.auc = db.auc.apply(lambda x: round(x,3))
db.gloss = db.gloss.apply(lambda x: round(x,3))
db.thickness = db.thickness.apply(lambda x: round(x,3))
db.expressiveness = db.expressiveness.apply(lambda x: round(x,3))

# center GEP

In [11]:
fullrows = db.loc[db.radarGroup.notnull()].index

In [12]:
def makeshift(l,xshift,yshift):
    return [l[0]-xshift,l[1]-yshift,0]

In [13]:
for spread_factor in [100,150,200,250,300]:
    gepcol = f'gep{spread_factor}'
    gepdf = db[gepcol].loc[fullrows]
    
    xmin = gepdf.apply(lambda x: x[0]).min()
    xrange = gepdf.apply(lambda x: x[0]).max() - xmin
    xshift = int(xrange/2)
    
    ymin = gepdf.apply(lambda x: x[1]).min()
    yrange = gepdf.apply(lambda x: x[1]).max() - ymin
    yshift = int(yrange/2)
    
    gepdf = gepdf.apply(lambda l: makeshift(l,xshift,yshift))
#     gepdf = gepdf.apply(lambda l: makeshift(l,0,0))
#     gepdf = gepdf.apply(lambda l: makeshift(l,int(xmin),int(ymin)))
    
    db[gepcol].loc[fullrows] = gepdf

# JSON

In [14]:
def fillrand(cell):
    if isinstance(cell,list):
        return cell
    else:
        return [
            np.random.randint(500,800),
            np.random.randint(500,800),
            0
        ]

In [15]:
db[[c for c in db.columns if 'gep' in c]] = db[[c for c in db.columns if 'gep' in c]].applymap(fillrand)

In [16]:
db.tmap = db.tmap.apply(fillrand)

In [17]:
db = db.fillna('')

#### Has processing instructions

In [18]:
import glob,os

In [19]:
allproc = glob.glob(os.path.join("/Users/damoncrockett/paperbase/src/assets/img/processing/","*.pdf"))

In [20]:
procats = [os.path.basename(item)[:-4] for item in allproc]

In [21]:
db['processing'] = 0

In [22]:
db.processing.loc[db.catalog.isin(procats)] = 1

In [23]:
db[['circa','postcard','resin','sb','processing']] = db[['circa','postcard','resin','sb','processing']].applymap(str)

#### dims

In [24]:
def int2str(i):
    try:
        int(i)
        if i==int(i):
            return str(int(i))
        else:
            return str(i)
    except:
        return i

In [25]:
def fraction2decimal(s):
    
    s = s.strip()
    s = s.lower()
    s = s.replace(' 1/2','.5')
    s = s.replace(' 1/4','.25')
    s = s.replace(' 3/4','.75')
    s = s.replace(' 1/8','.125')
    s = s.replace(' 3/8','.375')
    s = s.replace(' 5/8','.625')
    s = s.replace(' 7/8','.875')
    
    l = s.split('x')
    l = [item.strip() for item in l]
    
    if any(["'" in item for item in l]):
        l = [l[0],"roll"]
    
    if any(['cm' in item for item in l]):
        try:
            l = [round(float(item.replace(' cm','')) * 0.393701,3) for item in l]
        except:
            l = [round(float(item.replace('cm','')) * 0.393701,3) for item in l]
            
    try:
        l = [float(item) for item in l]
        l = sorted(l)
        l = [int2str(item) for item in l]
    except:
        pass
    
    if any([item=='18)/100' for item in l]):
        l = ['13','18']
    
    if len(l)==1:
        l0 = l[0]
        if l0 == "[no size]":
            return ''
        else:
            return l0
    else:
        return f'{l[0]} x {l[1]}'

In [26]:
db.dims = db.dims.apply(fraction2decimal)

#### sorting order

In [27]:
import re

def alpha_to_numeric(alpha):
    value = 0
    for char in alpha:
        # Position in alphabet, where 'a' is 1, 'z' is 26
        position = ord(char) - ord('a') + 1
        # Scale previous value by 26 (like base-26 numeral system)
        value = value * 26 + position
    return value

def catalog_key(s):
    numeric_part = re.findall(r'\d+', s)
    alpha_part = re.findall(r'[a-zA-Z]+', s)
    
    # Convert numeric part to integer
    num = int(numeric_part[0]) if numeric_part else 0
    
    # Convert alphabetic part to a single large integer
    alpha_value = alpha_to_numeric(alpha_part[0]) if alpha_part else 0
    
    return (num, alpha_value)

In [28]:
db['sortcol'] = db.catalog.apply(catalog_key)

In [29]:
db = db.sort_values(by='sortcol')

In [30]:
del db['sortcol']

In [31]:
db.to_pickle('db_json.pkl')

In [32]:
db.to_csv('august2024_dimensions.csv',index=False)

In [33]:
db.columns

Index(['catalog', 'man', 'bran', 'year', 'circa', 'surf', 'textureWord',
       'glossWord', 'colorWord', 'thicknessWord', 'dims', 'backp', 'postcard',
       'toner', 'resin', 'sb', 'sbid', 'suffs', 'dmin', 'dmax', 'dminHex',
       'dmaxHex', 'auc', 'roughness', 'gloss', 'thickness', 'expressiveness',
       'radarGroup', 'expressivenessGroup', 'isoGroup', 'gep100', 'gep150',
       'gep200', 'gep250', 'gep300', 'tmap', 'processing'],
      dtype='object')

In [34]:
dbjson = {col: db[col].tolist() for col in db.columns}

In [35]:
import json

with open('/Users/damoncrockett/paperbase/src/assets/data/data.json', 'w') as json_file:
    json.dump(dbjson, json_file)

# 4787 Image Overwrite

In [36]:
from PIL import Image
import shutil

In [37]:
# OLDDIR = "/Users/damoncrockett/Desktop/paperbase_OLD/samplebooks_expos/"
# allold = glob.glob(os.path.join(OLDDIR,"*.jpg"))
# alloldsorted = sorted([item for item in allold if '4787' in item])
# sourcez = alloldsorted[3:-3]
# PRE = "/Users/damoncrockett/paperbase/src/assets/img/samplebooks"

# for i,suff in enumerate('abcdefghijklmnopqrstuvw'):
#     source_file = sourcez[i]
#     shutil.copy(source_file,f'{PRE}/4787{suff}.jpg')
#     print(source_file,f'{PRE}/4787{suff}.jpg')
    
#     for resiz in [2048,1024,512,256]:
#         im = Image.open(source_file)
#         im.thumbnail((resiz,resiz),Image.LANCZOS)
#         im.save(f'{PRE}_{resiz}/4787{suff}.jpg')
#         print(f'{PRE}_{resiz}/4787{suff}.jpg')