In [1]:
from pymarc import MARCReader

### Suppressed from Discovery in a "manual" export

Checking whether a record "suppressed from discovery" will be marked as deleted when exported by sandbox.

In [14]:
suppressed_mms_id = '99239027600521'
marked_mms_id = '99245641300521'

In [3]:
%ls ~/Downloads/*.mrc

/Users/davisda4/Downloads/BIBLIOGRAPHIC_1582954420006676_1.mrc
/Users/davisda4/Downloads/BIBLIOGRAPHIC_1590207060006676_1.mrc
/Users/davisda4/Downloads/nlmui-7501160.mrc


In [5]:
with open('/Users/davisda4/Downloads/BIBLIOGRAPHIC_1590207060006676_1.mrc', 'rb') as f:
    reader = MARCReader(f, file_encoding='utf-8')
    records = [rec for rec in reader]
len(records)

1247

In [7]:
def get_control_id(rec):
    return rec.get_fields('001')[0].value()
get_control_id(records[0])

'99998900000541'

In [15]:
picklist = [rec for rec in records if get_control_id(rec) == marked_mms_id]
picklist[0].leader

'03402das a2200745 a 4500'

In [16]:
other = [rec for rec in records if get_control_id(rec) == suppressed_mms_id]
other[0].leader

'01160cas  2200241 i 4500'

But is it really suppressed? Let's double check.

In [17]:
from nlm_alma import Configuration, ApiClient, CatalogApi

config = Configuration.load('~/.config/alma-sandbox.yaml')
client = ApiClient(config)
bibapi = CatalogApi(client)

In [25]:
r = bibapi.get_bib_by_id(suppressed_mms_id)
r.suppress_from_publishing

'true'

### Suppressed From Discovery in a Publishing Profile

Now we check whether it will be marked 'd' (suppressed) in a publishing profile.

In [26]:
%ls *.mrc

sbox_2021072915_1590208860006676_new.mrc


In [27]:
with open('sbox_2021072915_1590208860006676_new.mrc', 'rb') as f:
    reader = MARCReader(f, file_encoding='utf-8')
    published = [rec for rec in reader]
len(published)

1136

Note that we got fewer records, some were simply not published for some reason.

In [29]:
[rec for rec in published if get_control_id(rec) == marked_mms_id]

[]

The record which we marked as deleted is not published at all.

In [31]:
[rec for rec in published if get_control_id(rec) == suppressed_mms_id][0].leader

'01160das  2200241 i 4500'

Confirming our suspicion, the record is marked as 'd' in a publishing profile.

### Which Records Were Missing?

It is odd that some records present in the manual export are missing.  Let's examine this.

In [32]:
import pandas as pd
import numpy as np
from datetime import datetime

In [36]:
def get_update_dt(rec):
    last_updated = ''.join(field.value() for field in rec.get_fields('005'))
    last_updated, _ = last_updated.split('.')
    return datetime.strptime(last_updated, "%Y%m%d%H%M%S")
get_update_dt(records[0])

datetime.datetime(2019, 12, 12, 13, 11, 18)

In [37]:
manual_fr = pd.DataFrame({'last_updated': [get_update_dt(rec) for rec in records]},
    index=[get_control_id(rec) for rec in records]
)
manual_fr

Unnamed: 0,last_updated
99998900000541,2019-12-12 13:11:18
99998930000541,2019-12-12 13:11:20
99998940000541,2019-12-12 13:11:17
99998960000541,2019-12-12 13:11:18
99999000000541,2019-12-12 13:11:18
...,...
991533660000541,2019-12-12 14:01:35
991533740000541,2019-12-12 14:01:37
991533790000541,2019-12-12 14:01:41
991541940000521,2019-12-12 14:02:04


In [39]:
pubprofile_fr = pd.DataFrame({'last_updated': [get_update_dt(rec) for rec in published]},
                             index=[get_control_id(rec) for rec in published])
pubprofile_fr

Unnamed: 0,last_updated
991353610000541,2019-12-12 13:39:57
991358100000541,2019-12-12 13:40:05
991341570000541,2019-12-12 13:39:19
991341530000541,2019-12-12 13:39:25
991331350000541,2019-12-12 13:38:48
...,...
991446850000541,2019-12-12 13:57:30
991431450000541,2019-12-12 13:56:44
991435380000541,2019-12-12 13:57:08
991440110000541,2019-12-12 13:57:15


In [41]:
joined = manual_fr.join(pubprofile_fr, how='outer', lsuffix='_manual', rsuffix='_profile')
joined

Unnamed: 0,last_updated_manual,last_updated_profile
991000500000541,2019-12-12 13:11:22,2019-12-12 13:11:22
991000810000541,2019-12-12 13:11:24,NaT
991000870000541,2019-12-12 13:11:33,2019-12-12 13:11:33
991000960000541,2019-12-12 13:11:24,2019-12-12 13:11:24
991011020000541,2019-12-12 13:12:09,2019-12-12 13:12:09
...,...,...
99999630000541,2019-12-12 13:11:20,2019-12-12 13:11:20
99999640000541,2019-12-12 13:11:29,2019-12-12 13:11:29
99999650000541,2019-12-12 13:11:19,2019-12-12 13:11:19
99999930000541,2019-12-12 13:11:26,2019-12-12 13:11:26


Are any records present in the publishing profile output that are missing in the manual export?

In [44]:
joined[joined.last_updated_manual.isnull()]

Unnamed: 0,last_updated_manual,last_updated_profile


which records are missing in the publishing profile?

In [45]:
missing = joined[joined.last_updated_profile.isnull()]
missing

Unnamed: 0,last_updated_manual,last_updated_profile
991000810000541,2019-12-12 13:11:24,NaT
991011100000541,2019-12-12 13:12:14,NaT
991011320000541,2019-12-12 13:12:12,NaT
991011770000541,2019-12-12 13:12:12,NaT
991012300000541,2019-12-12 13:12:09,NaT
...,...,...
99998800000541,2019-12-12 13:11:22,NaT
99998960000541,2019-12-12 13:11:18,NaT
99999030000541,2019-12-12 13:11:24,NaT
99999040000541,2019-12-12 13:11:27,NaT


Any records not missing where the timestamps don't match?

In [47]:
both = joined[joined.last_updated_profile.notnull()]
mismatch = both[both.last_updated_profile != both.last_updated_manual]
mismatch

Unnamed: 0,last_updated_manual,last_updated_profile


Let's see whether there is anything about those 111 missing records by making a set!

In [49]:
from nlm_alma import SetsApi

sets_api = SetsApi(client)

In [52]:
body = {
    'name': 'Missing Records from pubprofile',
    'description': 'Records that were published manually but were missing from the profile',
    'type': {'value': 'ITEMIZED'},
    'content': {'value': 'BIB_MMS'},
    'private': {'value': 'false'},
    'status': {'value': 'ACTIVE'},
    'origin': {'value': 'UI'},
}
set_obj = sets_api.create_set(body)
set_obj

{'additional_info': None,
 'content': {'desc': 'All Titles', 'value': 'BIB_MMS'},
 'created_by': {'desc': 'API, Ex Libris', 'value': 'exl_api'},
 'created_date': datetime.datetime(2021, 7, 29, 16, 29, 51, 160000, tzinfo=tzutc()),
 'description': 'Records that were published manually but were missing from '
                'the profile',
 'id': '1590225700006676',
 'link': 'https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/1590225700006676',
 'members': None,
 'name': 'Missing Records from pubprofile',
 'note': None,
 'number_of_members': {'link': 'https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/1590225700006676/members',
                       'value': '0'},
 'origin': {'desc': 'Institution only', 'value': 'UI'},
 'private': {'desc': 'No', 'value': 'false'},
 'query': None,
 'status': {'desc': 'Active', 'value': 'ACTIVE'},
 'status_date': datetime.datetime(2021, 7, 29, 16, 29, 51, 160000, tzinfo=tzutc()),
 'type': {'desc': 'Itemized', 'value': 'ITEMIZED'}}

In [54]:
members = {
    'total_record_count': len(missing),
    'member': [ {'id': mmsid} for mmsid in missing.index]
}
members

{'total_record_count': 111,
 'member': [{'id': '991000810000541'},
  {'id': '991011100000541'},
  {'id': '991011320000541'},
  {'id': '991011770000541'},
  {'id': '991012300000541'},
  {'id': '991012380000541'},
  {'id': '991012510000541'},
  {'id': '991012560000541'},
  {'id': '991012850000541'},
  {'id': '991012920000541'},
  {'id': '991015840000541'},
  {'id': '991021460000541'},
  {'id': '991025590000541'},
  {'id': '991027170000541'},
  {'id': '991027250000541'},
  {'id': '991027480000541'},
  {'id': '991027530000541'},
  {'id': '991027550000541'},
  {'id': '991027570000541'},
  {'id': '991028340000541'},
  {'id': '991028440000541'},
  {'id': '991028500000541'},
  {'id': '991028570000541'},
  {'id': '991028610000541'},
  {'id': '991028680000541'},
  {'id': '991028710000541'},
  {'id': '991028890000541'},
  {'id': '991028950000541'},
  {'id': '991029050000541'},
  {'id': '991029160000541'},
  {'id': '991029200000541'},
  {'id': '991029540000541'},
  {'id': '991029610000541'},
  {'i

In [56]:
set_obj.members = members
response = sets_api.manage_set_members(set_obj, 'add_members', set_obj.id)
response

{'additional_info': None,
 'content': {'desc': 'All Titles', 'value': 'BIB_MMS'},
 'created_by': {'desc': 'API, Ex Libris', 'value': 'exl_api'},
 'created_date': datetime.datetime(2021, 7, 29, 16, 29, 51, 160000, tzinfo=tzutc()),
 'description': 'Records that were published manually but were missing from '
                'the profile',
 'id': '1590225700006676',
 'link': 'https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/1590225700006676',
 'members': None,
 'name': 'Missing Records from pubprofile',
 'note': None,
 'number_of_members': {'link': 'https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/1590225700006676/members',
                       'value': '98'},
 'origin': {'desc': 'Institution only', 'value': 'UI'},
 'private': {'desc': 'No', 'value': 'false'},
 'query': None,
 'status': {'desc': 'Active', 'value': 'ACTIVE'},
 'status_date': datetime.datetime(2021, 7, 29, 16, 29, 51, 160000, tzinfo=tzutc()),
 'type': {'desc': 'Itemized', 'value': 'ITEMIZED'}}