In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("export_06_05_24.csv", encoding='latin-1')

In [3]:
from cromulent import model, vocab
import json

In [4]:
smp = df.iloc[0]

In [5]:
def photo_id_stringify(photoid):
    s = str(photoid)
    if s[0] == '-':
        return f'm{s[1:]}'
    else:
        return f'p{s}'

In [6]:
def record_label(rec):
    man = rec.Manufacturer
    bran = rec.Brand
    year = str(rec.Year)

    return f'{man} {bran} {year}'

In [7]:
# PhotoID to URI
paper = model.HumanMadeObject(ident=photo_id_stringify(smp.PhotoID), label=record_label(smp))

# Does the label need to be unique?

# Does the identifier need to start with a letter?

In [9]:
def create_timespan(smp):
    year = smp.Year
    uncertain = smp.DateUncertain

    assert len(str(year))==4
    try:
        int(year)
    except:
        return None

    year = str(year)
    botb = f'{year}-01-01T00:00:00Z'
    eote = f'{year}-12-31T23:59:59Z'

    if uncertain:
        dns = f'circa {year}'
    else:
        dns = year

    return botb, eote, dns

In [10]:
# Year, DateUncertain to Prod/Date
prod = model.Production()
paper.produced_by = prod
ts = model.TimeSpan()
botb, eote, dns = create_timespan(smp)
ts.begin_of_the_begin = botb
ts.end_of_the_end = eote
dn = vocab.DisplayName(content=dns)
ts.identified_by = dn
prod.timespan = ts

In [18]:
def get_catalog_number(smp):
    pcn = str(smp['Catalog Number'])
    scn = smp.fillna('')['Secondary Catalog Number']

    return pcn + scn

In [19]:
# Catalog Number, Secondary Catalog Number
# No information about the thing with this number
# Smush it in as call number by merging
paper.identified_by = model.Identifier(content=get_catalog_number(smp))

In [20]:
# Manufacturer = Group that made this thing
# There needs to be a separate record with a Name
prod.carried_out_by = model.Group(ident=smp.Manufacturer.lower(), label=smp.Manufacturer.lower())

In [21]:
def name_constructor(smp):
    man = smp.Manufacturer
    bran = smp.Brand
    year = str(smp.Year)

    return f'{man} {bran} Paper from {create_timespan(smp)[2]}'

In [22]:
# But ... We need a name
# Construct it from multiple fields?
paper.identified_by = vocab.PrimaryName(content=name_constructor(smp))

In [24]:
# Format = Description
paper.referred_to_by = vocab.Description(content=smp.Format)

In [25]:
# metatype: http://vocab.getty.edu/aat/300248479
paper.referred_to_by = model.LinguisticObject(content=f'Box {smp.LocationBox}, Bag {smp.LocationBag}')

In [26]:
# Not sure what to do about surface designation -- statement? Not comprehensible by itself
# TODO: add a custom display label "Manufacturer Surface Designation" (or something)
paper.referred_to_by = vocab.Note(content=smp.SurfaceDesignation2)

In [27]:
# Texture, Reflectance, Color, Weight -- classifications
# TODO - these need metatypes to convey the sort of classification
paper.classified_as = model.Type(ident=f'texture/{smp.Texture2}')
paper.classified_as = model.Type(ident=f'gloss/{smp.Reflectance2}')
paper.classified_as = model.Type(ident=f'color/{smp.BaseColor2}')
paper.classified_as = model.Type(ident=f'weight/{smp.Weight2}')

In [29]:
# TODO: Brand -- yet another classification?
js = model.factory.toJSON(paper)
rec = json.dumps(js, indent=2)
#print(rec)

# Missing values?

In [32]:
df.Manufacturer.value_counts()[:50]

Manufacturer
Kodak                         2372
Ilford                         602
Ansco                          482
DuPont                         414
DuPont Defender                380
Kodak London                   364
Agfa Gevaert                   325
Gevaert                        293
Defender                       278
Agfa                           239
Neobrom                        231
Ansco GAF                      187
Mimosa                         177
Leonar                          84
Agfa Ansco                      82
Haloid                          70
Luminos                         68
Darko                           46
Foma                            37
Oriental                        36
Ferrania                        36
Forte                           34
Kodak Canada                    32
Kentmere                        21
Negra                           15
Wellington & Ward               14
Voigtlander                     13
Autotype                        12
Spirato