This notebook cleans, transforms, and formats metadata for singlepart monographs for HathiTrust full catalog. Before running this notebook, the metadata must be extracted from an Alma-published MARC file and saved as a .pkl file. The .txt file produced at the end of this notebook is ready for posting to HathiTrust.
Repeatable field 952
Barcode = d
Material type subfield = e
Description subfield = f
Permanent library subfield = b
Permanent location subfield = c
Current location subfield = g
Process type subfield = h
Internal note 1 subfield = i

In [1]:
import pandas as pd
import numpy as np

In [2]:
#change filename if necessary
spm = pd.read_pickle('spm_df.pkl')
spm

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0
0,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0
0,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0
0,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0
0,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0
0,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0
0,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1
0,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0


In [3]:
#drop permanent libraries we don't own
spm_no_lmich = spm[~spm['perm_lib'].str.contains('lmich|DTWED|DAILR|DNRRI|DCED', case=False)]
spm_no_lmich

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0
0,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0
0,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0
0,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0
0,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0
0,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0
0,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1
0,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0


In [4]:
#drop items in zmlac locations that need to be excluded
spm_loc_clean = spm_no_lmich[~spm_no_lmich['perm_loc'].str.contains('CARN|MANN|MPL|MPLN|NONX|SLLN|SPP|SPPN|DCED|DNRRI|DAILR|DTWED', case=False)]
spm_loc_clean

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0
0,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0
0,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0
0,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0
0,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0
0,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0
0,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1
0,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0


In [5]:
#replace empty values with NaN
spm2 = spm_loc_clean
spm2 = spm_loc_clean.apply(lambda x: x.str.strip()).replace('', np.nan)
spm2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0
0,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0
0,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0
0,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0
0,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0
0,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0
0,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1
0,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0


In [6]:
#select a dataframe containing only records with data in internal_note1, which is where condition information will be found
inotes = spm2[spm2['internal_note1'].notnull()]
inotes

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0
0,9933807080001701,(OCoLC)17787620,31951D02183599C,BOOK,,TMAGR,GEN,GEN,,uncat docs,0
0,9933810960001701,(OCoLC)3833534,31951P01206714O,BOOK,,TWILS,GEN,ILLLEND1,ILL,tb 03/10,0
0,9933810290001701,(OCoLC)25371488,31956000393468,BOOK,,MBRIG,GEN,GEN,,PQ6353 .M369 1992,0
0,9933809260001701,(OCoLC)2074264,31951000541787J,BOOK,,ZMLAC,GEN,GEN,,wils,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9951840510001701,(OCoLC)41833851,31956000832036,BOOK,,MBRIG,GEN,GEN,,QB982.R7 1999,0
0,9951839790001701,(OCoLC)40465416,418792-1001,BOOK,,DUMD,UMDGS,UMDGS,,GPO,1
0,9951823070001701,(OCoLC)31997431,31951P00418112U,BOOK,,TVET,GEN,GEN,,MORE BCODES:31951P00418112U,0
0,9951826930001701,(OCoLC)33244704,31956000719662,BOOK,,MBRIG,GEN,GEN,,E169.1.K78 1996,0


In [7]:
#select a dataframe containing only records matching text strings denoting damage
brittle = inotes[inotes['internal_note1'].str.contains('brittle|damage|deteriorat', case=False)]
brittle

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag
0,9947794330001701,(OCoLC)19498351,31951002966873H,BOOK,,TWILS,GOVU,WDN,TECHNICAL,Water damaged,1
0,9935337090001701,(OCoLC)29091644,31951P00279417H,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0
0,9914234860001701,(OCoLC)1724843,31951P00279406M,BOOK,,TSCI,GEN,GEN,,Microclimate Box -- Brittle Item / MORE BCODES...,0
0,9931924990001701,(OCoLC)6830860,31951D01045053D,BOOK,,TLAW,STO,WDN,TECHNICAL,withdrawn/damaged,0
0,9965663200001701,(OCoLC)19436671,31951D00596558T,BOOK,,TLAW,MSPL,WDN,TECHNICAL,damaged; tossed & repl with photocopy,0
...,...,...,...,...,...,...,...,...,...,...,...
0,9944305630001701,(OCoLC)1227399,31951P00279428C,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0
0,9923698080001701,(OCoLC)10108023,31951002551382N,BOOK,,TLAW,GEN,WDN,TECHNICAL,"damaged, withdrawn per CSL/cso, 2/12",0
0,9952789340001701,(OCoLC)18520404,31951D00726436Z,BOOK,,TWILS,GEN,GEN,,damage noted.,0
0,9943121940001701,(OCoLC)38074351,31951D01356483S,BOOK,,TLAW,ARC,ARC,,marred by water damage,0


In [8]:
#add a column with condition indicator as defined by HathiTrust
brittle2 = brittle.assign(condition='BRT')
brittle2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition
0,9947794330001701,(OCoLC)19498351,31951002966873H,BOOK,,TWILS,GOVU,WDN,TECHNICAL,Water damaged,1,BRT
0,9935337090001701,(OCoLC)29091644,31951P00279417H,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0,BRT
0,9914234860001701,(OCoLC)1724843,31951P00279406M,BOOK,,TSCI,GEN,GEN,,Microclimate Box -- Brittle Item / MORE BCODES...,0,BRT
0,9931924990001701,(OCoLC)6830860,31951D01045053D,BOOK,,TLAW,STO,WDN,TECHNICAL,withdrawn/damaged,0,BRT
0,9965663200001701,(OCoLC)19436671,31951D00596558T,BOOK,,TLAW,MSPL,WDN,TECHNICAL,damaged; tossed & repl with photocopy,0,BRT
...,...,...,...,...,...,...,...,...,...,...,...,...
0,9944305630001701,(OCoLC)1227399,31951P00279428C,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0,BRT
0,9923698080001701,(OCoLC)10108023,31951002551382N,BOOK,,TLAW,GEN,WDN,TECHNICAL,"damaged, withdrawn per CSL/cso, 2/12",0,BRT
0,9952789340001701,(OCoLC)18520404,31951D00726436Z,BOOK,,TWILS,GEN,GEN,,damage noted.,0,BRT
0,9943121940001701,(OCoLC)38074351,31951D01356483S,BOOK,,TLAW,ARC,ARC,,marred by water damage,0,BRT


In [9]:
brittle3 = brittle2[['barcode', 'condition']]
brittle3

Unnamed: 0,barcode,condition
0,31951002966873H,BRT
0,31951P00279417H,BRT
0,31951P00279406M,BRT
0,31951D01045053D,BRT
0,31951D00596558T,BRT
...,...,...
0,31951P00279428C,BRT
0,31951002551382N,BRT
0,31951D00726436Z,BRT
0,31951D01356483S,BRT


In [10]:
spm3 = pd.merge(left=spm2, right=brittle3, how='left', on='barcode')
spm3

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,
4326753,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1,
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,


In [11]:
#double check the count of rows with the condition indicator
spm3[spm3['condition'].notnull()]

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition
957,9947794330001701,(OCoLC)19498351,31951002966873H,BOOK,,TWILS,GOVU,WDN,TECHNICAL,Water damaged,1,BRT
13763,9935337090001701,(OCoLC)29091644,31951P00279417H,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0,BRT
17442,9914234860001701,(OCoLC)1724843,31951P00279406M,BOOK,,TSCI,GEN,GEN,,Microclimate Box -- Brittle Item / MORE BCODES...,0,BRT
20426,9931924990001701,(OCoLC)6830860,31951D01045053D,BOOK,,TLAW,STO,WDN,TECHNICAL,withdrawn/damaged,0,BRT
22707,9965663200001701,(OCoLC)19436671,31951D00596558T,BOOK,,TLAW,MSPL,WDN,TECHNICAL,damaged; tossed & repl with photocopy,0,BRT
...,...,...,...,...,...,...,...,...,...,...,...,...
4317127,9944305630001701,(OCoLC)1227399,31951P00279428C,BOOK,,TSCI,GEN,GEN,,Microclimate box--Brittle Item / MORE BCODES:3...,0,BRT
4317188,9923698080001701,(OCoLC)10108023,31951002551382N,BOOK,,TLAW,GEN,WDN,TECHNICAL,"damaged, withdrawn per CSL/cso, 2/12",0,BRT
4317680,9952789340001701,(OCoLC)18520404,31951D00726436Z,BOOK,,TWILS,GEN,GEN,,damage noted.,0,BRT
4320163,9943121940001701,(OCoLC)38074351,31951D01356483S,BOOK,,TLAW,ARC,ARC,,marred by water damage,0,BRT


In [12]:
#create a dataframe where any row has a perm loc or curr loc of wdn to set the holdings status
wdn = spm3[(spm3['perm_loc'] == 'WDN') | (spm3['curr_loc'] == 'WDN')]
wdn

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition
13,9933807500001701,(OCoLC)24247754,31951P00227919X,BOOK,,TWILS,GEN,WDN,TECHNICAL,,1,
31,9933806300001701,(OCoLC)3458018,31951000140739R,THESIS,,TNRL,THS,WDN,TECHNICAL,,0,
45,9933810840001701,(OCoLC)866207,31951P00114433F,BOOK,,TMAGR,GEN,WDN,TECHNICAL,,0,
101,9930126430001701,(OCoLC)828333812,31951D03734532J,BOOK,,TMAGR,GEN,WDN,,,0,
204,9919746270001701,(OCoLC)43555308,31951D02160125C,BOOK,,TSCI,GEN,WDN,TECHNICAL,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
4326733,9951821920001701,(OCoLC)34413168,31951D008964778,BOOK,,TMAGR,GEN,WDN,,,0,
4326734,9951821920001701,(OCoLC)34413168,31951D01492740N,BOOK,,TWILS,GEN,WDN,,,0,
4326737,9951827040001701,(OCoLC)813436874,319530011565573,BOOK,,DUMD,UMDGS,WDN,,,1,
4326745,9951826290001701,(OCoLC)813436649,319530011565565,BOOK,,DUMD,UMDGS,WDN,,,1,


In [13]:
wdn2 = wdn.assign(holding_status='WD')
wdn2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
13,9933807500001701,(OCoLC)24247754,31951P00227919X,BOOK,,TWILS,GEN,WDN,TECHNICAL,,1,,WD
31,9933806300001701,(OCoLC)3458018,31951000140739R,THESIS,,TNRL,THS,WDN,TECHNICAL,,0,,WD
45,9933810840001701,(OCoLC)866207,31951P00114433F,BOOK,,TMAGR,GEN,WDN,TECHNICAL,,0,,WD
101,9930126430001701,(OCoLC)828333812,31951D03734532J,BOOK,,TMAGR,GEN,WDN,,,0,,WD
204,9919746270001701,(OCoLC)43555308,31951D02160125C,BOOK,,TSCI,GEN,WDN,TECHNICAL,,0,,WD
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326733,9951821920001701,(OCoLC)34413168,31951D008964778,BOOK,,TMAGR,GEN,WDN,,,0,,WD
4326734,9951821920001701,(OCoLC)34413168,31951D01492740N,BOOK,,TWILS,GEN,WDN,,,0,,WD
4326737,9951827040001701,(OCoLC)813436874,319530011565573,BOOK,,DUMD,UMDGS,WDN,,,1,,WD
4326745,9951826290001701,(OCoLC)813436649,319530011565565,BOOK,,DUMD,UMDGS,WDN,,,1,,WD


In [14]:
wdn3 = wdn2[['barcode', 'holding_status']]
wdn3

Unnamed: 0,barcode,holding_status
13,31951P00227919X,WD
31,31951000140739R,WD
45,31951P00114433F,WD
101,31951D03734532J,WD
204,31951D02160125C,WD
...,...,...
4326733,31951D008964778,WD
4326734,31951D01492740N,WD
4326737,319530011565573,WD
4326745,319530011565565,WD


In [15]:
spm4 = pd.merge(left=spm3, right=wdn3, how='left', on='barcode')
spm4

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,
4326753,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1,,WD
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,


In [16]:
spm4[spm4['holding_status'].notnull()]

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
13,9933807500001701,(OCoLC)24247754,31951P00227919X,BOOK,,TWILS,GEN,WDN,TECHNICAL,,1,,WD
31,9933806300001701,(OCoLC)3458018,31951000140739R,THESIS,,TNRL,THS,WDN,TECHNICAL,,0,,WD
45,9933810840001701,(OCoLC)866207,31951P00114433F,BOOK,,TMAGR,GEN,WDN,TECHNICAL,,0,,WD
101,9930126430001701,(OCoLC)828333812,31951D03734532J,BOOK,,TMAGR,GEN,WDN,,,0,,WD
204,9919746270001701,(OCoLC)43555308,31951D02160125C,BOOK,,TSCI,GEN,WDN,TECHNICAL,,0,,WD
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326733,9951821920001701,(OCoLC)34413168,31951D008964778,BOOK,,TMAGR,GEN,WDN,,,0,,WD
4326734,9951821920001701,(OCoLC)34413168,31951D01492740N,BOOK,,TWILS,GEN,WDN,,,0,,WD
4326737,9951827040001701,(OCoLC)813436874,319530011565573,BOOK,,DUMD,UMDGS,WDN,,,1,,WD
4326745,9951826290001701,(OCoLC)813436649,319530011565565,BOOK,,DUMD,UMDGS,WDN,,,1,,WD


In [17]:
#process type will help us set the LM aka lost or missing holdings status
proctype = spm4[spm4['process_type'].notnull()]
proctype

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
13,9933807500001701,(OCoLC)24247754,31951P00227919X,BOOK,,TWILS,GEN,WDN,TECHNICAL,,1,,WD
31,9933806300001701,(OCoLC)3458018,31951000140739R,THESIS,,TNRL,THS,WDN,TECHNICAL,,0,,WD
42,9933810960001701,(OCoLC)3833534,31951P01206714O,BOOK,,TWILS,GEN,ILLLEND1,ILL,tb 03/10,0,,
45,9933810840001701,(OCoLC)866207,31951P00114433F,BOOK,,TMAGR,GEN,WDN,TECHNICAL,,0,,WD
96,9933791580001701,(OCoLC)12162866,31956001045588,BOOK,,MBRIG,GEN,GEN,LOAN,QC174.17.R65 A48 1986,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326687,9951840430001701,(OCoLC)681488948,31951D03049882A,BOOK,,TWILS,GEN,GEN,LOAN,,0,,
4326690,9951840130001701,(OCoLC)44750105,31951D02029246M,BOOK,,TMAGR,GEN,WDN,TECHNICAL,,0,,WD
4326709,9951824080001701,(OCoLC)6098760,31951D02889299H,BOOK,,TNRL,GOVU,WDN,TECHNICAL,,1,,WD
4326724,9951822630001701,(OCoLC)770709440,31951D03525771G,BOOK,,TWILS,GOVJ,WDN,TECHNICAL,,1,,WD


In [18]:
lost_msg = proctype[((proctype['process_type'].str.contains('missing|lost', case=False)) & (proctype['holding_status'].isnull())) | 
                    ((proctype['internal_note1'].str.contains('missing|lost', case=False)) & (proctype['holding_status'].isnull()))]
lost_msg

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
1133,9973956598901701,(OCoLC)828858919,319530011535116,BOOK,,DUMD,UMDBK,UMDBK,MISSING,,0,,
5010,9954361780001701,(OCoLC)20296270,31951D024007326,BOOK,,TVET,GEN,GEN,MISSING,Gift of M. Lisa Berg,0,,
6939,9973998008601701,(OCoLC)909397324,31951D03493603C,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,
7569,9962248210001701,(OCoLC)52357507,31951D02658046B,BOOK,,TWILS,GEN,GEN,MISSING,,0,,
8244,9975607604301701,(OCoLC)1008576927,31951D03657739D,BOOK,,TWILS,GEN,GEN,LOST_LOAN,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4318255,9974752773501701,(OCoLC)944132880,31951D03493793H,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,
4318683,9912097200001701,(OCoLC)275593,31956001726617,BOOK,,MBRIG,GEN,GEN,LOST_LOAN,PS3053 .H32,0,,
4319391,9936790410001701,(OCoLC)70054238,31958001104969,BOOK,,CUMC,GEN,GEN,MISSING,,0,,
4319733,9951886070001701,(OCoLC)24157867,31951P01018528J,BOOK,,TMUSI,GEN,GEN,LOST_LOAN,31951P00958163Q;,0,,


In [19]:
lost_msg2 = lost_msg.assign(holding_status='LM')
lost_msg2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
1133,9973956598901701,(OCoLC)828858919,319530011535116,BOOK,,DUMD,UMDBK,UMDBK,MISSING,,0,,LM
5010,9954361780001701,(OCoLC)20296270,31951D024007326,BOOK,,TVET,GEN,GEN,MISSING,Gift of M. Lisa Berg,0,,LM
6939,9973998008601701,(OCoLC)909397324,31951D03493603C,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,LM
7569,9962248210001701,(OCoLC)52357507,31951D02658046B,BOOK,,TWILS,GEN,GEN,MISSING,,0,,LM
8244,9975607604301701,(OCoLC)1008576927,31951D03657739D,BOOK,,TWILS,GEN,GEN,LOST_LOAN,,0,,LM
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4318255,9974752773501701,(OCoLC)944132880,31951D03493793H,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,LM
4318683,9912097200001701,(OCoLC)275593,31956001726617,BOOK,,MBRIG,GEN,GEN,LOST_LOAN,PS3053 .H32,0,,LM
4319391,9936790410001701,(OCoLC)70054238,31958001104969,BOOK,,CUMC,GEN,GEN,MISSING,,0,,LM
4319733,9951886070001701,(OCoLC)24157867,31951P01018528J,BOOK,,TMUSI,GEN,GEN,LOST_LOAN,31951P00958163Q;,0,,LM


In [20]:
spm4.update(lost_msg2, overwrite=False)
spm4

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,
4326753,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1,,WD
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,


In [21]:
spm4[spm4['holding_status'] == 'LM']

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
1133,9973956598901701,(OCoLC)828858919,319530011535116,BOOK,,DUMD,UMDBK,UMDBK,MISSING,,0,,LM
5010,9954361780001701,(OCoLC)20296270,31951D024007326,BOOK,,TVET,GEN,GEN,MISSING,Gift of M. Lisa Berg,0,,LM
6939,9973998008601701,(OCoLC)909397324,31951D03493603C,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,LM
7569,9962248210001701,(OCoLC)52357507,31951D02658046B,BOOK,,TWILS,GEN,GEN,MISSING,,0,,LM
8244,9975607604301701,(OCoLC)1008576927,31951D03657739D,BOOK,,TWILS,GEN,GEN,LOST_LOAN,,0,,LM
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4318255,9974752773501701,(OCoLC)944132880,31951D03493793H,BOOK,,TBIOM,GEN,GEN,MISSING,,0,,LM
4318683,9912097200001701,(OCoLC)275593,31956001726617,BOOK,,MBRIG,GEN,GEN,LOST_LOAN,PS3053 .H32,0,,LM
4319391,9936790410001701,(OCoLC)70054238,31958001104969,BOOK,,CUMC,GEN,GEN,MISSING,,0,,LM
4319733,9951886070001701,(OCoLC)24157867,31951P01018528J,BOOK,,TMUSI,GEN,GEN,LOST_LOAN,31951P00958163Q;,0,,LM


In [22]:
#at this point any row with no information in the holding_status column should be currently held
currhol = spm4[spm4['holding_status'].isnull()]
currhol

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326750,9951825630001701,(OCoLC)34493309,31951D00903854U,BOOK,,TNRL,GEN,GEN,,,0,,
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,


In [23]:
currhol2 = currhol.assign(holding_status='CH')
currhol2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,CH
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,CH
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,CH
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,CH
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,CH
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326750,9951825630001701,(OCoLC)34493309,31951D00903854U,BOOK,,TNRL,GEN,GEN,,,0,,CH
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,CH
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,CH
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,CH


In [24]:
spm4.update(currhol2, overwrite=False)
spm4

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,CH
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,CH
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,CH
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,CH
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,CH
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4326751,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,CH
4326752,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,CH
4326753,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1,,WD
4326754,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,CH


In [25]:
#check to see if there are any NaN results or other information to clean up in the holding_status column
spm4['holding_status'].unique()

array(['CH', 'WD', 'LM'], dtype=object)

In [42]:
#the following are a series of final data checks
#if the resulting dataframe is not empty, need to add logic to clean up invalid library codes
weird_lib = spm4[spm4['perm_lib'].apply(lambda x: len(str(x)) > 5)]
weird_lib

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status


In [43]:
#make sure there's nothing weird in the permanent library column
#watch out for LMICH|DTWED|DAILR|DNRRI|DCED
drop_badzmlac['perm_lib'].unique()

array(['TWILS', 'MBRIG', 'TMAGR', 'TAND', 'TBIOM', 'DUMD', 'TNRL', 'TSCI',
       'TCOS', 'TBWAN', 'TARCH', 'ZMLAC', 'TMUSI', 'TLAW', 'TMATH',
       'TAHL', 'TVET', 'TJOUR', 'CUMC', 'TLAKE', 'RUMR', 'WIBKS', 'TINTE'],
      dtype=object)

In [44]:
drop_badzmlac['perm_loc'].unique()

array(['GEN', 'CLR', 'RARD', 'UMDBK', 'GOVS', 'GOVU', 'THS', 'SWH', 'AME',
       'CLRH', 'EAS', 'RARE', 'CLS', 'UMDGS', 'GENN', 'CBIC', 'CBI',
       'UMDBB', 'CHL', 'RAR', 'RARH', 'USD', 'YMCAA', 'GENX', 'RARV',
       'IHRCA', 'RART', 'GOVH', 'REF', 'BELL', 'MAP', 'GOV', 'GOVX',
       'MUS', 'UMD', 'UMDLR', 'MAPMR', 'MAPO', 'AMEX', 'UMDSN', 'MSSNA',
       'MCG', 'RARG', 'STO', 'BELLR', 'GOVJ', 'RARA', 'CLRO', 'UMDCL',
       'CHILD', 'RARK', 'FTL', 'LART', 'EASR', 'REFS', 'UMDTM', 'GOVK',
       'LAW', 'RARM', 'IHRCG', 'RARL', 'GOVC', 'REFC', 'UMDSC', 'YMCA',
       'UNAO', 'GENW', 'SRV', 'CLRL', 'HRL', 'MSPL', 'UND', 'ARC', 'CSC',
       'UMDRC', 'RARAV', 'GOVN', 'MOL', 'UNA', 'WDN', 'PAM', 'REFD',
       'OWL', 'REFT', 'INPL', 'PDISP', 'RARF', 'INPT', 'FSC', 'RARR',
       'RARW', 'RARB', 'RARZ', 'HEG', 'RASK', 'TED', 'OHW', 'SN2',
       'YMCAR', 'REFCO', 'MEDST', 'IHRCZ', 'POP', 'GOVUM', 'MAPR', 'GOVO',
       'MAPA', 'MSSLT', 'UMDWL', 'NON', 'STF', 'MSC', 'CLRR', 'VAC',
     

In [45]:
#check syntax to write a query to check locations that should have been excluded
whoops = spm4[spm4['perm_lib'].str.contains('LMICH')]
whoops

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
3805,9962840380001701,(OCoLC)15607785,20000004578593,BOOK,,LMICH,MGD,MGD,,,1,,CH
3806,9928492730001701,(OCoLC)15699647,20000004587818,BOOK,,LMICH,MGD,MGD,,,1,,CH
3807,9957947600001701,(OCoLC)10104179,20000004148298,BOOK,,LMICH,MGD,MGD,,,1,,CH
3808,9928541090001701,(OCoLC)11353261,20000004279895,BOOK,,LMICH,MGD,MGD,,,1,,CH
3809,9913804180001701,(OCoLC)3338777,20000003286982,BOOK,,LMICH,MGD,MGD,,,1,,CH
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4500389,9953179430001701,(OCoLC)6809362,20000003739519,BOOK,,LMICH,MGD,MGD,,,1,,CH
4500390,9916057830001701,(OCoLC)11080212,20000004248494,BOOK,,LMICH,MGD,MGD,,,1,,CH
4500391,9929535200001701,(OCoLC)11121573,20000004248528,BOOK,,LMICH,MGD,MGD,,,1,,CH
4500392,9961918330001701,(OCoLC)12228454,20000004380792,BOOK,,LMICH,MGD,MGD,,,1,,CH


In [46]:
#something more sophisticated here to check multiple locs
whoops2 = spm4[spm4['perm_loc'].str.contains('MPL|NONX')]
whoops2

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
235,9919749680001701,(OCoLC)378414,31951M011850433,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
238,9919749140001701,(OCoLC)1416067,31951M01195321V,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
239,9919748730001701,(OCoLC)235479,31951M01077339L,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
254,9919732780001701,(OCoLC)262686,31951M012247900,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
258,9919732500001701,(OCoLC)386306,31951M01079117T,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4504152,9932541290001701,(OCoLC)1276368,31951M01179557U,BOOK,,ZMLAC,MPL,MPL,,,0,,CH
4504174,9932549440001701,(OCoLC)2093433,31951M01111181H,BOOK,,ZMLAC,MPL,MPL,,"and,mpl",1,,CH
4504199,9932531910001701,(OCoLC)13973975,31951M010524211,BOOK,,ZMLAC,MPL,MPL,,"and,mpl",0,,CH
4504285,9939405420001701,(OCoLC)371050,31951M011042385,BOOK,,ZMLAC,MPL,MPL,,"and,mpl",0,,CH


In [47]:
#if the resulting dataframe is not empty, need to add logic to clean up multiple OCNs
double_ocn = spm4[spm4['OCN'].str.contains(';')]
double_ocn

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status


In [48]:
#check for OCN that don't start with a digit
import re
text_check = re.compile('(\(OCoLC\))[^0-9]')
first_zero = re.compile('(\(OCoLC\))0')
fix_ocns = spm4[(spm4['OCN'].str.match(text_check)) | (spm4['OCN'].str.match(first_zero))]
fix_ocns

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
81028,9975882421001701,(OCoLC)06862123,319510028674292,BOOK,,TWILS,GOVU,GOVU,,,1,,CH
419870,9912612600001701,(OCoLC)+,31951SC20058799,BOOK,,TAND,CBI,CBI,,,0,,CH
794925,9912612600001701,(OCoLC)+,31951000030632M,BOOK,,TBIOM,GEN,WDN,,,0,,WD
950399,9942702530001701,(OCoLC)04528276,31951D01514556B,BOOK,,TLAW,RAR,RAR,,,0,,CH
1012441,9912612600001701,(OCoLC)+,31951SC2005596L,BOOK,,TAND,CBI,CBI,,,0,,CH
1054769,9912612600001701,(OCoLC)+,31951SC2005982G,BOOK,,TAND,CBI,CBI,,,0,,CH
1060663,9964088320001701,(OCoLC)09217036,31953000111547R,BOOK,,DUMD,UMDBK,UMDBK,,,0,,CH
1398810,9964088320001701,(OCoLC)09217036,31951000496644T,BOOK,,TSCI,GEN,GEN,,,0,,CH
1576715,9912612600001701,(OCoLC)+,31951000030634I,BOOK,,ZMLAC,GEN,GEN,,MnU TMc,0,,CH
1595629,9912612600001701,(OCoLC)+,31951SC2005597J,BOOK,,TAND,CBI,CBI,,,0,,CH


In [49]:
#if the resulting dataframe is not empty, need to add logic to clean up OCNs
extra_ocn = spm4[(spm4['OCN'].str.contains(' ')) | (spm4['OCN'].str.contains('\s')) | (spm4['OCN'].str.len() > 17)]
extra_ocn

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status
632578,9917212680001701,(OCoLC)9917212680001701,31951D03433691P,BOOK,,TLAW,GEN,GEN,,,0,,CH
942615,9974773078501701,(OCoLC)916684458 $,31951D03848859Q,BOOK,,TWILS,GEN,GEN,,,0,,CH
3213378,9976662109701701,(OCoLC) 00466436,31951D01507929P,BOOK,,TLAW,STO,STO,,,0,,CH


In [50]:
#if the resulting dataframe is not empty, need to add logic to clean up multiple or bad MMS IDs
multi_mmsid = spm4[spm4['MMS ID'].str.contains('\D')]
multi_mmsid

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status


In [51]:
#see if any rows have item description
has_description = spm4[spm4['description'].notnull()]
has_description

Unnamed: 0,MMS ID,OCN,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,gov_doc_flag,condition,holding_status


In [52]:
#change column headers to match what HathiTrust expects
spm5 = spm4.rename(index=str, columns={"OCN":"OCLC #", "MMS ID":"Partner's Local System ID", "condition":"Condition",
                                       "gov_doc_flag":"Government Documents Indicator", "holding_status":"Holding status"})
spm5

Unnamed: 0,Partner's Local System ID,OCLC #,barcode,material type,description,perm_lib,perm_loc,curr_loc,process_type,internal_note1,Government Documents Indicator,Condition,Holding status
0,9933808450001701,(OCoLC)65700639,319510005417514,BOOK,,TWILS,GEN,GEN,,,0,,CH
1,9933808360001701,(OCoLC)4587773,31956001217864,BOOK,,MBRIG,GEN,GEN,,QK926 .P62,0,,CH
2,9933808350001701,(OCoLC)6377917,31951P00533318G,BOOK,,TWILS,GEN,GEN,,,0,,CH
3,9933808230001701,(OCoLC)62626655,31951T00174761U,BOOK,,TWILS,GEN,GEN,,,0,,CH
4,9933808210001701,(OCoLC)9799919,31951000140326G,BOOK,,TMAGR,GEN,GEN,,,0,,CH
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4504396,9951825480001701,(OCoLC)34585654,31951P00419577H,BOOK,,TSCI,GEN,GEN,,,0,,CH
4504397,9951825380001701,(OCoLC)32589145,31951P004266158,BOOK,,TWILS,GEN,GEN,,,0,,CH
4504398,9951825330001701,(OCoLC)2370706,31951P00375825F,BOOK,,TNRL,GOVU,WDN,TECHNICAL,MORE BCODES:31951P00375825F,1,,WD
4504399,9951825320001701,(OCoLC)6042829,31951D013899679,BOOK,,TAND,CLR,CLR,,,0,,CH


In [53]:
#check the headers
spm5.columns

Index(['Partner's Local System ID', 'OCLC #', 'barcode', 'material type',
       'description', 'perm_lib', 'perm_loc', 'curr_loc', 'process_type',
       'internal_note1', 'Government Documents Indicator', 'Condition',
       'Holding status'],
      dtype='object')

In [54]:
#produce a dataframe that has only the columns we need in the order HT wants
spm6 = spm5[['OCLC #', 'Partner\'s Local System ID', 'Holding status', 'Condition', 'Government Documents Indicator']]
spm6

Unnamed: 0,OCLC #,Partner's Local System ID,Holding status,Condition,Government Documents Indicator
0,(OCoLC)65700639,9933808450001701,CH,,0
1,(OCoLC)4587773,9933808360001701,CH,,0
2,(OCoLC)6377917,9933808350001701,CH,,0
3,(OCoLC)62626655,9933808230001701,CH,,0
4,(OCoLC)9799919,9933808210001701,CH,,0
...,...,...,...,...,...
4504396,(OCoLC)34585654,9951825480001701,CH,,0
4504397,(OCoLC)32589145,9951825380001701,CH,,0
4504398,(OCoLC)2370706,9951825330001701,WD,,1
4504399,(OCoLC)6042829,9951825320001701,CH,,0


In [None]:
#update the date in the filenames before running this cell
spm6.to_pickle('umn_single-part-uncombined_test2_20200619.pkl')
spm6.to_csv('umn_single-part-uncombined_test2_20200619.txt', sep='\t', index=False)

In [None]:
spm6["OCLC #"].nunique()

In [None]:
spm6["Partner's Local System ID"].nunique()