# Extract Records from .mrc Through Matching
Given a list of unique identifiers (ISBns, DOIs, URLs, ect.), extract the records containing those identifiers.

## 1. Open the MARC file the records are being extracted from and the file the selected records are going into

In [5]:
from pathlib import Path
MARC_FILE_LOCATION = Path('C:\\', 'Users', 'ereiskind', 'Downloads', 'MyMarcRecords.mrc')
NEW_MARC_FILE_LOCATION = Path('C:\\', 'Users', 'ereiskind', 'OneDrive - Florida State University', 'Attachments', 'UPSO_missing_20210430.mrc')

starting_MARC_file = open(MARC_FILE_LOCATION, 'rb')
new_MARC_file = open(NEW_MARC_FILE_LOCATION, 'wb')

## 2. Create the list of identifiers
1. Add the unique identifiers to the cell below as a string with one identifier per line
2. Set `identifier_field` to a string of the field and possible subfield the identifiers are being compared against; if a subfield is included, use "$" as the delimeter

In [6]:
identifiers = """
10.5743/cairo/9789774162930.001.0001
10.1525/california/9780520202160.001.0001
10.1525/california/9780520282322.001.0001
10.1525/california/9780520294394.001.0001
10.1525/california/9780520295902.001.0001
10.7208/chicago/9780226031088.001.0001
10.7208/chicago/9780226184036.001.0001
10.7208/chicago/9780226451787.001.0001
10.7208/chicago/9780226582689.001.0001
10.7208/chicago/9780226768397.001.0001
10.7591/cornell/9780801449314.001.0001
10.3366/edinburgh/9780748623440.001.0001
10.3366/edinburgh/9780748623808.001.0001
10.3366/edinburgh/9780748625338.001.0001
10.3366/edinburgh/9780748633227.001.0001
10.3366/edinburgh/9780748637874.001.0001
10.3366/edinburgh/9780748684618.001.0001
10.3366/edinburgh/9781474402927.001.0001
10.5422/fordham/9780823251384.001.0001
10.5422/fso/9780823230099.001.0001
10.5422/fso/9780823230570.001.0001
10.5422/fso/9780823231874.001.0001
10.21313/hawaii/9780824833695.001.0001
10.21313/hawaii/9780824840945.001.0001
10.21313/hawaii/9780824851569.001.0001
10.5406/illinois/9780252038273.001.0001
10.5622/illinois/9780252041334.001.0001
10.5949/liverpool/9781781380307.001.0001
10.5949/UPO9781846312717
10.5949/UPO9781846313615
10.5949/UPO9781846314445
10.7228/manchester/9780719072161.001.0001
10.7228/manchester/9780719076213.001.0001
10.7228/manchester/9781784991326.001.0001
10.7551/mitpress/9780262018104.001.0001
10.5149/9780807869406_malone
10.5149/9780807882627_mcwhirter
10.5149/9780807886397_rondon
10.5149/northcarolina/9781469620848.001.0001
10.5149/northcarolina/9781469622439.001.0001
10.5149/northcarolina/9781469622699.001.0001
10.5149/northcarolina/9781469631868.001.0001
10.18574/nyu/9780814709221.001.0001
10.18574/nyu/9780814716915.001.0001
10.18574/nyu/9780814787007.001.0001
10.18574/nyu/9781479859405.001.0001
10.18574/nyu/9781479867011.001.0001
10.23943/princeton/9780691150765.001.0001
10.23943/princeton/9780691160733.001.0001
10.11126/stanford/9780804770866.001.0001
10.11126/stanford/9780804774437.001.0001
10.11126/stanford/9780804780155.001.0001
10.1093/acprof:oso/9780199552894.001.0001
10.1093/acprof:oso/9780199644735.001.0001
"""
identifiers = identifiers.split("\n")[1:-1] # The splice removes elements created by having the opening and closing quotes on their own lines

identifier_field = "856$u"

## 3. Check the identifiers against the records
This includes keeping a list of the matched identifiers

In [7]:
from pymarc import MARCReader, Record
import sys

MARCfile = MARCReader(starting_MARC_file)
matched_identifiers = []

for record in MARCfile: # MARC records must be outer loop--when attempting to use them as inner loop, they don't reset for each outer loop iteration, so they're only checked against the first item in "identifiers"
    if identifier_field == "856$u":
        for identifier in identifiers:
            if identifier not in record['856']['u']:
                continue
            else:
                new_MARC_file.write(record.as_marc())
                matched_identifiers.append(identifier)
                break
    else:
        print(f"The MARC tag {identifier_field} hasn't been matched to a PyMARC tag in this program. The program is exiting.")
        starting_MARC_file.close()
        new_MARC_file.close()
        sys.exit()

## 4. Output the identifiers not matched

In [8]:
if len(matched_identifiers) < len(identifiers):
    identifiers_not_matched = []
    
    for identifier in identifiers:
        if identifier not in matched_identifiers:
            identifiers_not_matched.append(identifier)

    print(identifiers_not_matched)

## 4. Close the MARC files
To create a .mrk file from these MARC files, this notebook needs to be closed.

In [9]:
starting_MARC_file.close()
new_MARC_file.close()