## Imports

In [1]:
import pandas as pd
import numpy as np

import torch
from torch import nn
from torch.optim import Adam
from tqdm import tqdm

from transformers import BertTokenizer
from transformers import BertModel

## Load BERT Model from PT File

In [2]:
if torch.has_mps:
    device = 'mps'
else:
    device = 'cpu'

print(device)

mps


In [3]:
train_data_path = 'Data/BF_Single_Subject_Clean_Call_Number.csv'
df = pd.read_csv(train_data_path, sep='\t')
df = df.drop("Unnamed: 0", axis=1)



# Hardcoded fix
for i, row in df.iterrows():
  if row['Call_Number'] == '724.55.C63':
    print("Trash")
    df.at[i, 'Call_Number'] = '724'


vals = pd.unique(df['Call_Number'])
print(len(vals))
labels = {key: value for (key, value) in enumerate(vals)}

Trash
657


In [4]:
class BertClassifier(nn.Module):
  def __init__(self, dropout=0.5):
    super(BertClassifier, self).__init__()

    self.bert = BertModel.from_pretrained('bert-base-cased')
    self.dropout = nn.Dropout(dropout)
    self.linear = nn.Linear(768, len(vals)) # Change depending on dataset size
    self.relu = nn.ReLU()

  def forward(self, input_id, mask):
    _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
    dropout_output = self.dropout(pooled_output)
    linear_output = self.linear(dropout_output)
    final_layer = self.relu(linear_output)

    return final_layer

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [6]:
model_path = "Models/BERT_Classifier_25_epochs_subjects_clean.pt"

model = BertClassifier()
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

## Load EBook Data

In [25]:
import string

ebook_path = 'Data/EbookListforALA.csv'
ebook_df = pd.read_csv(ebook_path, encoding_errors='ignore')
#ebook_df = df.drop("Unnamed: 0", axis=1)
ebook_df.head()

Unnamed: 0,Searched Subject header,EbookID,Call Number,Title,Author,Publication Year,Subject_Headers,Thumbnail Link,Link to Library Record,worldCatCall#],Library,URL
0,Consciousness,e001,,Trauma and its impacts on temporal experience\...,"Mezzalira, Selene",2022,"Psychic trauma, Time perception, Consciousness",https://www.syndetics.com/index.php?isbn=10005...,https://search.lib.byu.edu/byu/record/cat.7418...,BF 175.5 .P75 M49 2022,University of Michigan,https://search.lib.umich.edu/catalog?query=isn...
1,Consciousness,e002,,The evolution of consciousness\nRepresenting t...,"Droege, Paula",2022,"Consciousness, Cognition",https://www.syndetics.com/index.php?isbn=97813...,https://search.lib.byu.edu/byu/record/cat.7405...,BF311 .D76 2022,Stanford,https://searchworks.stanford.edu/view/14125824
2,Cognitive Science,e003,,The adaptable mind\r\nWhat neuroplasticity and...,"Zerilli, John",2021,"Cognitive science, Neuroplasticity, Language a...",http://books.google.com/books/content?id=H-MJE...,https://search.lib.byu.edu/byu/record/cat.7226...,BF311 .Z447 2021,SUU,https://suu.bywatersolutions.com/cgi-bin/koha/...
3,Psychoanalysis,e004,,Psychoanalysis and Deconstruction : Freud's Ps...,"Russell, Jared",2022,"Psychoanalysis, Psychotherapy, Deconstruction",https://www.syndetics.com/index.php?isbn=10004...,https://search.lib.byu.edu/byu/record/cat.7397...,BF173 .R884 2020,UC Boulder,https://libraries.colorado.edu/search/?searchs...
4,Psychoanalysis,e005,,"Mutual analysis\nFerenczi, Severn, and the ori...","Rudnytsky, Peter L.",2022,"Ferenczi, Sándor, 1873-1933, Severn, Elizabeth...",https://www.syndetics.com/index.php?isbn=13152...,https://search.lib.byu.edu/byu/record/cat.7402...,BF109.F47 R83 2022,UCLA,https://search.library.ucla.edu/discovery/full...


## Predict Call Numbers for each EBook and write back to dataframe

In [26]:
for i, row in ebook_df.iterrows():
    # Get headers from data frame
    headers_in = row['Subject_Headers']

    # Clean up headers, remove punctuation, lower case
    headers_in = headers_in.replace('[{}]'.format(string.punctuation), '')
    headers_in = headers_in.lower()


    # Tokenize Headings to pass through model
    tokenized = tokenizer(headers_in, padding='max_length',max_length=512, truncation=True,return_tensors="pt")
    input_id = tokenized['input_ids']
    mask = tokenized['attention_mask']

    # Get model output
    output = model(input_id, mask)

    # Get label from dictionary
    res = output.argmax(dim=1)
    call_out = labels[res.item()]

    # Create Call Number and write to data frame
    author_first_letter = row['Author'][0]
    new_call = "BF " + str(call_out) + " ." + author_first_letter
    
    print(f"{i}: Assigned call number = {new_call}")

    ebook_df.at[i, 'Call Number'] = new_call

    
ebook_df.head()

0: Assigned call number = BF 175.5 .M
1: Assigned call number = BF 311 .D
2: Assigned call number = BF 311 .Z
3: Assigned call number = BF 173 .R
4: Assigned call number = BF 173 .R
5: Assigned call number = BF 353 .B
6: Assigned call number = BF 353 .D
7: Assigned call number = BF 378 .G
8: Assigned call number = BF 371 .G
9: Assigned call number = BF 378 .B
10: Assigned call number = BF 456 .E
11: Assigned call number = BF 385 .M
12: Assigned call number = BF 385 .S
13: Assigned call number = BF 408 .S
14: Assigned call number = BF 408 .F
15: Assigned call number = BF 412 .S
16: Assigned call number = BF 408 .K
17: Assigned call number = BF 39 .B
18: Assigned call number = BF 176 .L
19: Assigned call number = BF 431 .E
20: Assigned call number = BF 431 .H
21: Assigned call number = BF 455 .B
22: Assigned call number = BF 671 .F
23: Assigned call number = BF 721 .R
24: Assigned call number = BF 721 .W
25: Assigned call number = BF 173 .G
26: Assigned call number = BF 431 .P
27: Assign

Unnamed: 0,Searched Subject header,EbookID,Call Number,Title,Author,Publication Year,Subject_Headers,Thumbnail Link,Link to Library Record,worldCatCall#],Library,URL
0,Consciousness,e001,BF 175.5 .M,Trauma and its impacts on temporal experience\...,"Mezzalira, Selene",2022,"Psychic trauma, Time perception, Consciousness",https://www.syndetics.com/index.php?isbn=10005...,https://search.lib.byu.edu/byu/record/cat.7418...,BF 175.5 .P75 M49 2022,University of Michigan,https://search.lib.umich.edu/catalog?query=isn...
1,Consciousness,e002,BF 311 .D,The evolution of consciousness\nRepresenting t...,"Droege, Paula",2022,"Consciousness, Cognition",https://www.syndetics.com/index.php?isbn=97813...,https://search.lib.byu.edu/byu/record/cat.7405...,BF311 .D76 2022,Stanford,https://searchworks.stanford.edu/view/14125824
2,Cognitive Science,e003,BF 311 .Z,The adaptable mind\r\nWhat neuroplasticity and...,"Zerilli, John",2021,"Cognitive science, Neuroplasticity, Language a...",http://books.google.com/books/content?id=H-MJE...,https://search.lib.byu.edu/byu/record/cat.7226...,BF311 .Z447 2021,SUU,https://suu.bywatersolutions.com/cgi-bin/koha/...
3,Psychoanalysis,e004,BF 173 .R,Psychoanalysis and Deconstruction : Freud's Ps...,"Russell, Jared",2022,"Psychoanalysis, Psychotherapy, Deconstruction",https://www.syndetics.com/index.php?isbn=10004...,https://search.lib.byu.edu/byu/record/cat.7397...,BF173 .R884 2020,UC Boulder,https://libraries.colorado.edu/search/?searchs...
4,Psychoanalysis,e005,BF 173 .R,"Mutual analysis\nFerenczi, Severn, and the ori...","Rudnytsky, Peter L.",2022,"Ferenczi, Sándor, 1873-1933, Severn, Elizabeth...",https://www.syndetics.com/index.php?isbn=13152...,https://search.lib.byu.edu/byu/record/cat.7402...,BF109.F47 R83 2022,UCLA,https://search.library.ucla.edu/discovery/full...


In [30]:
for i, row in ebook_df.iterrows():
    ebook_df.at[i, 'Title'] = ebook_df.at[i, 'Title'].replace('\n', ' ')

ebook_df.head()

Unnamed: 0,Searched Subject header,EbookID,Call Number,Title,Author,Publication Year,Subject_Headers,Thumbnail Link,Link to Library Record,worldCatCall#],Library,URL
0,Consciousness,e001,BF 175.5 .M,Trauma and its impacts on temporal experience ...,"Mezzalira, Selene",2022,"Psychic trauma, Time perception, Consciousness",https://www.syndetics.com/index.php?isbn=10005...,https://search.lib.byu.edu/byu/record/cat.7418...,BF 175.5 .P75 M49 2022,University of Michigan,https://search.lib.umich.edu/catalog?query=isn...
1,Consciousness,e002,BF 311 .D,The evolution of consciousness Representing th...,"Droege, Paula",2022,"Consciousness, Cognition",https://www.syndetics.com/index.php?isbn=97813...,https://search.lib.byu.edu/byu/record/cat.7405...,BF311 .D76 2022,Stanford,https://searchworks.stanford.edu/view/14125824
2,Cognitive Science,e003,BF 311 .Z,The adaptable mind\r What neuroplasticity and ...,"Zerilli, John",2021,"Cognitive science, Neuroplasticity, Language a...",http://books.google.com/books/content?id=H-MJE...,https://search.lib.byu.edu/byu/record/cat.7226...,BF311 .Z447 2021,SUU,https://suu.bywatersolutions.com/cgi-bin/koha/...
3,Psychoanalysis,e004,BF 173 .R,Psychoanalysis and Deconstruction : Freud's Ps...,"Russell, Jared",2022,"Psychoanalysis, Psychotherapy, Deconstruction",https://www.syndetics.com/index.php?isbn=10004...,https://search.lib.byu.edu/byu/record/cat.7397...,BF173 .R884 2020,UC Boulder,https://libraries.colorado.edu/search/?searchs...
4,Psychoanalysis,e005,BF 173 .R,"Mutual analysis Ferenczi, Severn, and the orig...","Rudnytsky, Peter L.",2022,"Ferenczi, Sándor, 1873-1933, Severn, Elizabeth...",https://www.syndetics.com/index.php?isbn=13152...,https://search.lib.byu.edu/byu/record/cat.7402...,BF109.F47 R83 2022,UCLA,https://search.library.ucla.edu/discovery/full...


## Write New Ebook Data to MYSQL Database

In [31]:
ebook_df.to_csv('Data/EbookForALA_WithPredictions.csv')

In [32]:
import mysql.connector

In [34]:
start_ind = 21412 + 1

# Connect to MYSQL Database
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="Gksdnihon101!",
    database='library_books'
)

# Create Cursor
mycursor = mydb.cursor()


for i, row in ebook_df.iterrows():
    title = row['Title']
    r_type = "EBook"
    author = row['Author']
    call = row['Call Number']
    thumb = row['Thumbnail Link']
    digit = call.split(' ')[1]
    alpha = call.split(' ')[2][1]

    sql = f"""INSERT INTO ala_last (
            TableIndex, TitleID, Title, Resource_Type, Author, Call_Number, Call_Digit, Call_Author, Thumbnail_Link)
            VALUES ({start_ind+i}, {'NULL'}, "{title}","{r_type}", "{author}","{call}",{digit},"{alpha}", "{thumb}")"""

    print(sql)

    # Execute and commit
    mycursor.execute(sql)
    mydb.commit()

INSERT INTO ala_last (
            TableIndex, TitleID, Title, Resource_Type, Author, Call_Number, Call_Digit, Call_Author, Thumbnail_Link)
            VALUES (21413, NULL, "Trauma and its impacts on temporal experience New perspectives from phenomenology and psychoanalysis","EBook", "Mezzalira, Selene","BF 175.5 .M",175.5,"M", "https://www.syndetics.com/index.php?isbn=100053166X/lc.jpg&client=sirsi")
INSERT INTO ala_last (
            TableIndex, TitleID, Title, Resource_Type, Author, Call_Number, Call_Digit, Call_Author, Thumbnail_Link)
            VALUES (21414, NULL, "The evolution of consciousness Representing the present moment","EBook", "Droege, Paula","BF 311 .D",311,"D", "https://www.syndetics.com/index.php?isbn=9781350166813/lc.jpg&client=sirsi")
INSERT INTO ala_last (
            TableIndex, TitleID, Title, Resource_Type, Author, Call_Number, Call_Digit, Call_Author, Thumbnail_Link)
 What neuroplasticity and neural reuse tell us about language and cognition","EBook", "Zerill