In [8]:
import csv
import pandas as pd
with open ('museumjson.csv') as museum_file:
    museum_df = pd.read_csv(museum_file, ',', None,
        usecols = ["emuIRN", "curatorial_section", "object_number", "object_name", "native_name", "culture", "provenience",
                   "material", "period", "date_made", "date_made_early", "date_made_late", "accession_credit_line", "creator",
                   "description", "culture_area", "technique", "iconography", "measurement_height", "measurement_length",
                   "measurement_width", "measurement_outside_diameter", "measurement_tickness", "measurement_unit",
                   "other_numbers", "url"], low_memory = False);
    museum_file.close();

In [3]:
# Replace NaNs with blanks if the column is a string, so we don’t get
# objects in the column
def fillna_col (series):
    if series.dtype is pd.np.dtype(object):
        return series.fillna('')
    else:
        return series
    
# Replace '\N' with NaN
def nullify (element):
    if element == '\\N':
        return float('nan')
    else:
        return element

In [9]:
museum_df

Unnamed: 0,emuIRN,curatorial_section,object_number,object_name,native_name,culture,provenience,material,period,date_made,...,technique,iconography,measurement_height,measurement_length,measurement_width,measurement_outside_diameter,measurement_tickness,measurement_unit,other_numbers,url
0,241947,American,29-47-124,Seal Drag Handle,,Eskimo,Northern Alaska|British America,Seal Skin,,,...,,,,45.7,,,,cm,,http://www.penn.museum/collections/object/241947
1,187327,American,29-151-169,Lance,,Eskimo,Northern Alaska|Alaska,Bone|Iron,,,...,,,,21.5,0.9,,,cm,,http://www.penn.museum/collections/object/187327
2,353827,American,40-16-1749,Vessel|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/353827
3,349877,American,40-16-949,Vessel|Pedestal|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 50,http://www.penn.museum/collections/object/349877
4,349878,American,40-16-921,Carafe|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 15,http://www.penn.museum/collections/object/349878
5,349879,American,40-16-1751,Vessel|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/349879
6,349880,American,40-16-1752,Handles|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/349880
7,353824,American,40-16-1753,Carafe|Neck Sherd,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/353824
8,353825,American,40-16-1747,Carafe|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 2,http://www.penn.museum/collections/object/353825
9,353826,American,40-16-1761,Plate|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 27,http://www.penn.museum/collections/object/353826


In [10]:
museum_df.dtypes

emuIRN                            int64
curatorial_section               object
object_number                    object
object_name                      object
native_name                      object
culture                          object
provenience                      object
material                         object
period                           object
date_made                        object
date_made_early                  object
date_made_late                   object
accession_credit_line            object
creator                          object
description                      object
culture_area                     object
technique                        object
iconography                      object
measurement_height              float64
measurement_length              float64
measurement_width               float64
measurement_outside_diameter    float64
measurement_tickness            float64
measurement_unit                 object
other_numbers                    object


In [11]:
museum_df = museum_df.applymap(nullify);
museum_df.dropna(0, 'any', None, ['emuIRN']);

In [13]:
import sqlite3
engine = sqlite3.connect('lookupDB');
museum_df.to_sql(name = 'lookupDB', con = engine, schema = None, if_exists = 'replace', index = False);
museum_df = pd.read_sql('select * from lookupDB', engine);

In [15]:
museum_df

Unnamed: 0,emuIRN,curatorial_section,object_number,object_name,native_name,culture,provenience,material,period,date_made,...,technique,iconography,measurement_height,measurement_length,measurement_width,measurement_outside_diameter,measurement_tickness,measurement_unit,other_numbers,url
0,241947,American,29-47-124,Seal Drag Handle,,Eskimo,Northern Alaska|British America,Seal Skin,,,...,,,,45.7,,,,cm,,http://www.penn.museum/collections/object/241947
1,187327,American,29-151-169,Lance,,Eskimo,Northern Alaska|Alaska,Bone|Iron,,,...,,,,21.5,0.9,,,cm,,http://www.penn.museum/collections/object/187327
2,353827,American,40-16-1749,Vessel|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/353827
3,349877,American,40-16-949,Vessel|Pedestal|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 50,http://www.penn.museum/collections/object/349877
4,349878,American,40-16-921,Carafe|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 15,http://www.penn.museum/collections/object/349878
5,349879,American,40-16-1751,Vessel|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/349879
6,349880,American,40-16-1752,Handles|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/349880
7,353824,American,40-16-1753,Carafe|Neck Sherd,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 12,http://www.penn.museum/collections/object/353824
8,353825,American,40-16-1747,Carafe|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 2,http://www.penn.museum/collections/object/353825
9,353826,American,40-16-1761,Plate|Sherds,,Cocle,Panama|Sitio Conte,Ceramic,,AD 450 - 900,...,,,,,,,,cm,B19 - 27,http://www.penn.museum/collections/object/353826
