# Bestandsabfrage von Judaica Zeitschriften 
über die DNB SRU Schnittstelle

nutzt MARC21-XML (https://www.loc.gov/marc/bibliographic/)


In [3]:
# for the sake of readability remove the warnings
import warnings

warnings.filterwarnings('ignore')

## Import the libraries

In [4]:
import pandas as pd
import numpy as np
import os

# Get the metadata

In [None]:
df_metadata = pd.read_excel('metadata/CM_Seiten_Metadaten.xlsx')
df_metadata.head()

## Statistics on the Metadata  

In [None]:
df_metadata.shape

In [None]:
df_metadata.sample(5)

In [None]:
# pandas get all headers
df_metadata.columns

## Bereinigung der Daten

In [None]:
# drop columns VLID Seite, OT_PAHT, VLID_Zs
df_metadata.drop(['VLID_Seite', 'OT_PATH', 'VLID_Zs', 'VLID_Parent', 'Parent Knotentyp', 'Seite (OT_SORT)', 'Seite_Caption', 'Seitenzahl_Caption'], axis=1, inplace=True)

In [None]:
# list all values in column 'Parent-Type'
df_metadata['Parent-Type'].unique()

## Get all Journal titles   

In [None]:
df_metadata['Zs_Caption'].unique()

In [None]:
# remove duplicates in column 'Zs_Caption'
df_metadata['Zs_Caption'] = df_metadata['Zs_Caption'].str.replace(' \(.*\)', '')

In [None]:
df_metadata['Zs_Caption'].unique()

In [None]:
df_metadata.sample(5)

In [None]:
df_metadata['Aufsatz_Caption'].unique()

## load the pickle file

In [None]:
import pickle

# Open the Pickle file
with open('metadata/journal_metadata/journal_list.pickle', 'rb') as file:
    # Load the Pickle data
    loaded_data = pickle.load(file)

# Close the file
file.close()

# Now you can work with the loaded data
print(loaded_data)


## Load the JSON file

In [None]:
import json

with open('metadata/journal_metadata/journal_metadata_title_lang.json', 'r') as file:
    data = json.load(file)

In [None]:
import pprint
pprint.pprint(data)

# DNB SRU

In [21]:
import requests
from bs4 import BeautifulSoup as soup
import unicodedata
from lxml import etree
import pandas as pd


# Funktion zur DNB SRU Abfrage

In [22]:
def dnb_sru(query, library='dnb', start=1, max_records=1000):
    base_url = "https://services.dnb.de/sru/" + library
    params = {'recordSchema' : 'MARC21-xml',
              'operation': 'searchRetrieve',
              'version': '1.1',
              'maximumRecords': max_records,
              'startRecord': start,
              'query': query
              }
    r = requests.get(base_url, params=params)
    xml = soup(r.content)
    records = xml.find_all('record', {'type':'Bibliographic'})

    if len(records) < 100:
        return records

    else:
        num_results = 100
        i = 101
        while num_results == 100:
            params.update({'startRecord': i})
            r = requests.get(base_url, params=params)
            xml = soup(r.content)
            new_records = xml.find_all('record', {'type':'Bibliographic'})
            records+=new_records
            i+=100
            num_results = len(new_records)

        return records

## Alle Judaica 

In [40]:
records = dnb_sru(query='tit=Judaica', library='dnb', start=1, max_records=1000)
print(len(records), 'Ergebnisse')

1425 Ergebnisse


In [24]:
import xml.dom.minidom
temp = soup(open(records), "xml")
new_xml = temp.prettify()
print(new_xml)

TypeError: unhashable type: 'ResultSet'

# print the xml tree in the first result


In [None]:
def parse_record(record):

    ns = {"marc":"http://www.loc.gov/MARC21/slim"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))

    #idn
    idn = xml.xpath("marc:controlfield[@tag = '001']", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'fail'

    # title
    title = xml.xpath("marc:datafield[@tag = '245']/marc:subfield[@code = 'a']", namespaces=ns)
    subtitle = xml.xpath("marc:datafield[@tag = '245']/marc:subfield[@code = 'b']", namespaces=ns)
   
    # library
    library = xml.xpath("marc:datafield[@tag = '040']/marc:subfield[@code = 'a']", namespaces=ns)
   
    # category
    category = xml.xpath("marc:datafield[@tag = '084']/marc:subfield[@code = 'a']", namespaces=ns)
   
    # date
    date = xml.xpath("marc:datafield[@tag = '264']/marc:subfield[@code = 'c']", namespaces=ns)
   
    # person 
    person = xml.xpath("marc:datafield[@tag = '100']/marc:subfield[@code = 'a']", namespaces=ns)
   
    # ddc
    ddc = xml.xpath("marc:datafield[@tag = '082']/marc:subfield[@code= 'a']", namespaces=ns)
   
    #issn
    issn = xml.xpath("marc:datafield[@tag = '022']/marc:subfield[@code= 'a']", namespaces=ns)
   
    # language
    language = xml.xpath("marc:datafield[@tag = '041']/marc:subfield[@code= 'a']", namespaces=ns)
   
    # GND Number
    gnd_number = xml.xpath("marc:datafield[@tag = '024']/marc:subfield[@code = 'a']", namespaces=ns)
   
    # Extract Statement of Responsibility
    responsibility = xml.xpath("marc:datafield[@tag = '245']/marc:subfield[@code = 'c']", namespaces=ns)
   
    # Extract Publication Information
    publication_info = xml.xpath("marc:datafield[@tag = '260']", namespaces=ns)
   
    # Extract Frequency and Regularity of Publication
    frequency = xml.xpath("marc:datafield[@tag = '310']/marc:subfield[@code = 'a']", namespaces=ns)
   
    # Extract Subject Access Points
    subjects = xml.xpath("marc:datafield[@tag = '650']/marc:subfield[@code = 'a' or @code = 'x' or @code = 'v']", namespaces=ns)
   
    # Extract Electronic Location and Access
    electronic_access = xml.xpath("marc:datafield[@tag = '856']/marc:subfield[@code = 'u']", namespaces=ns)
    
    # Extract Physical Description
    physical_description = xml.xpath("marc:datafield[@tag = '300']/marc:subfield[@code = 'a']", namespaces=ns)
    
    # Extract Series Statement
    series_statement = xml.xpath("marc:datafield[@tag = '440']/marc:subfield[@code = 'a']", namespaces=ns)
    
    # Extract Other Standard Identifier
    other_identifier = xml.xpath("marc:datafield[@tag = '024']/marc:subfield[@code = 'a']", namespaces=ns)
    
    # Extract Notes
    notes = xml.xpath("marc:datafield[@tag = '500']/marc:subfield[@code = 'a']", namespaces=ns)
    
    # Extract Subject Headings
    subjects = xml.xpath("marc:datafield[@tag = '650']/marc:subfield[@code = 'a' or @code = 'x' or @code = 'v']", namespaces=ns)
    
    # Extract Linking Entry Fields
    linking_entry = xml.xpath("marc:datafield[starts-with(@tag, '76') or starts-with(@tag, '77') or starts-with(@tag, '78')]", namespaces=ns)
    
    # Extract Classification Numbers (Dewey Decimal Classification)
    dewey_classification = xml.xpath("marc:datafield[@tag = '082']/marc:subfield[@code = 'a']", namespaces=ns)
    
    # Extract Classification Numbers (Government Document Classification)
    gov_doc_classification = xml.xpath("marc:datafield[@tag = '086']/marc:subfield[@code = 'a']", namespaces=ns)


    try:
        title = title[0].text
        library = library
        subtitle = subtitle[0].text if len(subtitle) > 0 else ""
        category = category[0].text if len(category) > 0 else ""
        date = date[0].text if len(date) > 0 else ""
        person = person[0].text if len(person) > 0 else ""
        ddc = ddc[0].text if len(ddc) > 0 else ""
        issn = issn[0].text if len(issn) > 0 else ""
        language = language[0].text if len(language) > 0 else ""
        gnd_number = gnd_number[0].text if len(gnd_number) > 0 else ""
        responsibility = responsibility[0].text if len(responsibility) > 0 else ""
        publication_info = publication_info[0].text if len(publication_info) > 0 else ""
        frequency = frequency[0].text if len(frequency) > 0 else ""
        subjects = [subject.text for subject in subjects]
        electronic_access = electronic_access[0].text if len(electronic_access) > 0 else ""
        notes = notes[0].text if len(notes) > 0 else ""
        physical_description = physical_description[0].text if len(physical_description) > 0 else ""
        series_statement = series_statement[0].text if len(series_statement) > 0 else ""
        other_identifier = other_identifier[0].text if len(other_identifier) > 0 else ""
        linking_entry = [entry.text for entry in linking_entry]
        dewey_classification = dewey_classification[0].text if len(dewey_classification) > 0 else ""
        gov_doc_classification = gov_doc_classification[0].text if len(gov_doc_classification) > 0 else ""
        subjects = [subject.text for subject in subjects]
        
        #titel = unicodedata.normalize("NFC", titel)
    except:
        title = "unkown"

    meta_dict = {
        "idn":idn,
        "library":library,
        "title":title,
        "subtitle":subtitle,
        "category":category,
        "date":date,
        "person":person,
        'ddc':ddc,
        'issn':issn,
        'language':language,
        'gnd_number':gnd_number,
        'responsibility':responsibility,
        'publication_info':publication_info,
        'frequency':frequency,
        'subjects':subjects,
        'electronic_access':electronic_access,
        'notes':notes,
        'physical_description':physical_description,
        'series_statement':series_statement,
        'other_identifier':other_identifier,
        'linking_entry':linking_entry,
        'dewey_classification':dewey_classification,
        'gov_doc_classification':gov_doc_classification
        }

    return meta_dict

In [None]:
output = [parse_record(record) for record in records]
df_test = pd.DataFrame(output)
df_test

In [None]:
## try to get the data for the zdb and the title Jeshurun

In [None]:
zdb_records = dnb_sru(query='tit=Jeschurun', library='zdb', start=1, max_records=1000)
print(len(zdb_records), 'Ergebnisse')

zdb_output = [parse_record(zdb_record) for zdb_record in zdb_records]
zdb_df = pd.DataFrame(zdb_output)
zdb_df

# Daten zusammenführen

In [None]:
# Create an empty DataFrame to store the aggregated data
df_aggregated = pd.DataFrame(columns=['idn', 'title', 'subtitle', 'date'])

# Group by 'title' and 'subtitle' and aggregate 'idn' and 'date' into lists
grouped_data = zdb_df.groupby(['title', 'subtitle']).agg({'idn': list, 'date': lambda x: x.unique().tolist() if len(x.unique()) == 1 else x.tolist()}).reset_index()

# Populate the aggregated DataFrame
df_aggregated['idn'] = grouped_data['idn']
df_aggregated['date'] = grouped_data['date']
df_aggregated['title'] = grouped_data['title']
df_aggregated['subtitle'] = grouped_data['subtitle']


# Display the aggregated DataFrame
df_aggregated


### Remove the temporary pickle file

In [None]:
# remove the temp_data.pkl file
if os.path.exists('temp_data.pkl'):
    os.remove('temp_data.pkl')

## Load the titles

In [None]:
# print Zs_Caption unique values
titles = df_metadata['Zs_Caption'].unique()

### Print the titles

In [None]:
print(titles)

In [None]:
# go through all the titles in the column 'Zs_Caption'
# Load an existing temporary DataFrame if it exists

try:
    temp_df = pd.read_pickle('temp_data.pkl')
except FileNotFoundError:
    temp_df = pd.DataFrame()
    
counter = 1
length = len(titles)

temp_df = pd.DataFrame()
library = 'dnb'
column_headers = ['library', 'idn',  'title', 'subtitle', 'category', 'date', 'person', 'ddc', 'issn']

for title in titles:
    # search for the title in the sru interface
    print('Counter: ' + str(counter) + ' of ' + str(length))
    # calculate the percentage of the progress
    percentage = round(counter / length * 100, 2)
    print('Progress: ' + str(percentage) + '%')
    print('Searching for: ' + title)
    records = dnb_sru('tit=' + title, library=library)
    print(len(records), 'Ergebnisse')
    
    if len(records) > 0:

        output = [parse_record(record) for record in records]
        df_records = pd.DataFrame(output)
        temp_df = pd.concat([temp_df, df_records], ignore_index=True)
    counter = counter + 1
    # Save the temporary DataFrame to a Pickle file
    temp_df.to_pickle('temp_data.pkl')

In [None]:
# go through all the titles in the column 'Zs_Caption'
# Load an existing temporary DataFrame if it exists

try:
    temp_df = pd.read_pickle('temp_data.pkl')
except FileNotFoundError:
    temp_df = pd.DataFrame()

counter = 1
length = len(titles)

temp_df = pd.DataFrame()
library = 'zdb'

for title in titles:
    # search for the title in the sru interface
    print('Counter: ' + str(counter) + ' of ' + str(length))
    # calculate the percentage of the progress
    percentage = round(counter / length * 100, 2)
    print('Progress: ' + str(percentage) + '%')
    print('Searching for: ' + title)
    records = dnb_sru('tit=' + title, library=library)
    print(len(records), 'Ergebnisse')

    if len(records) > 0:
        output = [parse_record(record) for record in records]
        df_records = pd.DataFrame(output)
        temp_df = pd.concat([temp_df, df_records], ignore_index=True)
    counter = counter + 1
    # Save the temporary DataFrame to a Pickle file
    temp_df.to_pickle('temp_data.pkl')

In [None]:
large_df = pd.DataFrame()
# Load the temporary DataFrame from the Pickle file
temp_df = pd.read_pickle('temp_data.pkl')

# Append the temporary DataFrame to your large DataFrame
large_df = pd.concat([large_df, temp_df], ignore_index=True)

# remove duplicates
large_df.drop_duplicates(subset=['title', 'date'], keep='first', inplace=True)

# Save the large DataFrame to a CSV file
large_df.to_csv('final_data.csv', index=True, sep=';')

large_df.to_excel('final_data.xlsx')

# Ergebnisse bereinigen

In [None]:
# Daten neu laden

df_data = pd.read_csv('final_data.csv', index_col=0, sep=';')
df_data.sample(5)

In [None]:
df_data.to_csv('final_data_cleaned.csv', index=True, sep=';')
df_data.to_excel('final_data_cleaned.xlsx')

In [None]:
# remove everything which is JUDAICA in the column "category"
df_data = df_data[df_data['category'] != 'JUDAICA']
df_data.sample(5)

In [None]:
# Remove rows containing "jud" or "israel" in the "title" column
df_data = df_data[~df_data['title'].str.contains('jud|Jud|jüd|Jüd|israel|Israel|hebr|Hebr', case=False)]
df_data = df_data[~df_data['subtitle'].str.contains('jud|Jud|jüd|Jüd|israel|Israel|hebr|Hebr', case=False)]

In [None]:
# Define the patterns to search for
patterns = ['jüd*', 'jud*', 'Jüd*', 'Jud*', 'israel*', 'Israel*', 'hebr*', 'Hebr*']

# Combine the patterns into a single regular expression pattern
regex_pattern = '|'.join(patterns)

# Create a boolean mask for rows that match the patterns in the "title" or "subtitle" columns
mask = (df_data['title'].str.contains(regex_pattern, case=False, na=False) |
        df_data['subtitle'].str.contains(regex_pattern, case=False, na=False))

# Filter out the rows that match the patterns
df_data = df_data[~mask]



In [None]:
with open("journal_titles.txt", 'r', encoding='utf-8') as file:
    lines = file.readlines()
    titles = set(line.strip() for line in lines)

In [None]:
print(len(titles))

In [None]:
import re

# Read the text file and store titles/subtitles in a set
with open("journal_titles.txt", 'r', encoding='utf-8') as file:
    lines = file.readlines()
    titles_to_remove = set(line.strip() for line in lines)

# Escape special characters in each pattern and combine them into a single regular expression pattern
escaped_patterns = [re.escape(pattern) for pattern in patterns]
regex_pattern = '|'.join(escaped_patterns)

# Create a boolean mask for rows where either the title or subtitle contains any string from the file
mask_title = df_data['title'].str.contains(regex_pattern, case=False, na=False)
mask_subtitle = df_data['subtitle'].str.contains(regex_pattern, case=False, na=False)

# Filter out the rows that match any string from the file in the title or subtitle
df_data = df_data[~(mask_title | mask_subtitle)]

# Print or use the resulting DataFrame
df_data

In [None]:
df_data.to_csv('final_data_rest.csv', index=True, sep=';')
df_data.to_excel('final_data_rest.xlsx')

In [None]:
print(len(df_data))

In [None]:
df_data

# Manuelle Bereinigung
Aufgrund der hohen Anzahl an unrelevanten Treffern ist eine manuelle Bereinigung notwendig.
Die relevanten Treffer wurden aus der final_data_rest.csv entfernt und als final_data_manuel.csv gespeichert.
Die unrelevanten Treffer wurden aufgrund der Masse belassen und werden dann von dem gesamten Bestand abgezogen.

Es wurden über 78.000 Treffer gefunden, die manuell bereinigt werden müssen. Davon enthalten viele ein Stichwort wie Jahresbericht, Blätter, etc. und sind somit nicht relevant.

Die Bereinigung ergab immerhin noch 1327 relevante Treffer. Das sind über 1000 Treffer mehr als in den vorhandenen Metadaten.

Davon sind einige Titel trotzdem doppelt, die aber nicht bereinigt werden. Da sie manchmal verschiedene Untertitel enthalten, in verschiedenen Jahren erschienen (z.B. neue Folgen) sind oder es sich um verschiedene Zeitschriften mit gleichem Titel handelt.

In [20]:
df_mask = pd.read_csv('final_data_manual.csv', index_col=0, sep=';')
df_data = pd.read_csv('final_data.csv', index_col=0, sep=';')

# Remove the manually filtered rows from the original DataFrame
df_data = df_data[~df_data['idn'].isin(df_mask['idn'])]
df_data.to_csv('final_data_aggregated.csv', index=True, sep=';')

In [4]:
print(len(df_data))

1327


In [5]:
df_data

Unnamed: 0,idn,library,title,subtitle,category,date,person,ddc,issn
0,012981613,zdb,Führer durch die jüdische Gemeindeverwaltung u...,,JUDAICA,1932-1933,,340,
2,015310019,zdb,Führer durch die jüdische Wohlfahrtspflege in ...,,JUDAICA,1928-1929,,290,
4,1147584931,zdb,Actes et conférences de la Société des Études ...,,JUDAICA,1886-1889,,290,
6,015223736,zdb,Jüdischer Almanach für Groß-Rumänien,,JUDAICA,1922-,,290,
7,1143848942,zdb,Jüdischer Almanach,,JUDAICA,1902-1903,,910,
...,...,...,...,...,...,...,...,...,...
78704,1185662448,zdb,Jüdischer Volksfreund,"Monats-Beilage zum ""Israelit""",JUDAICA,1906,,290,
78705,1177932881,zdb,Beilage in Wiedergutmachungsfragen,,JUDAICA,1947-1947,,290,
78706,1199950920,zdb,Bericht des Vorstandes der Israelitischen Ster...,für das Jahr ...,,[1895?-1905],,360,
78708,015236781,zdb,Gemeindeblatt für die jüdischen Gemeinden Preu...,,JUDAICA,1934-1937,,070,


# Daten zusammenführen

## Vorbereiten der Daten

In [7]:
# remove all NaN values and replace them with an empty string
df_data.fillna('', inplace=True)

In [8]:
# split the date column into two columns
df_data['date'] = df_data['date'].str.replace('$b', '-')
df_data['date'] = df_data['date'].str.replace('oder', '-') # Ist zwar nicht richtig, aber so ist es einfacher. Alternativ splitten

df_data[['year_from', 'year_to']] = df_data['date'].str.split('-', expand=True)
# remove all "?" from the date columns
df_data['year_from'] = df_data['year_from'].str.replace('?', '')
df_data['year_from'] = df_data['year_from'].str.replace('None', '')
df_data['year_from'] = df_data['year_from'].str.replace('[', '')
df_data['year_from'] = df_data['year_from'].str.replace(']', '')
# convert to integer
df_data['year_from'] = pd.to_numeric(df_data['year_from'], errors='coerce').fillna(0).astype(int) # Nicht optimal, da 0 ein gültiges Jahr sein kann

df_data['year_to'] = df_data['year_to'].str.replace('?', '')
df_data['year_to'] = df_data['year_to'].str.replace('None', '')
df_data['year_to'] = df_data['year_to'].str.replace('[', '')
df_data['year_to'] = df_data['year_to'].str.replace(']', '')
# convert to integer
df_data['year_to'] = pd.to_numeric(df_data['year_to'], errors='coerce').fillna(0).astype(int) # Nicht optimal, da 0 ein gültiges Jahr sein kann


In [9]:
# if year_to is nan or NaT then set it to year_from
df_data['year_to'] = df_data['year_to'].fillna(df_data['year_from'])

In [8]:
df_data

Unnamed: 0,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,012981613,zdb,Führer durch die jüdische Gemeindeverwaltung u...,,JUDAICA,1932-1933,,340,,1932,1933
2,015310019,zdb,Führer durch die jüdische Wohlfahrtspflege in ...,,JUDAICA,1928-1929,,290,,1928,1929
4,1147584931,zdb,Actes et conférences de la Société des Études ...,,JUDAICA,1886-1889,,290,,1886,1889
6,015223736,zdb,Jüdischer Almanach für Groß-Rumänien,,JUDAICA,1922-,,290,,1922,0
7,1143848942,zdb,Jüdischer Almanach,,JUDAICA,1902-1903,,910,,1902,1903
...,...,...,...,...,...,...,...,...,...,...,...
78704,1185662448,zdb,Jüdischer Volksfreund,"Monats-Beilage zum ""Israelit""",JUDAICA,1906,,290,,1906,0
78705,1177932881,zdb,Beilage in Wiedergutmachungsfragen,,JUDAICA,1947-1947,,290,,1947,1947
78706,1199950920,zdb,Bericht des Vorstandes der Israelitischen Ster...,für das Jahr ...,,[1895?-1905],,360,,1895,1905
78708,015236781,zdb,Gemeindeblatt für die jüdischen Gemeinden Preu...,,JUDAICA,1934-1937,,070,,1934,1937


# Statistische Auswertungen

Welche Daten konnten vervollständigt werden


# ZDB (Zeitschriftendatenbank)


# Interate over the titles

In [None]:
# go through all the titles

df_records_per_title = pd.DataFrame(columns=['title', 'number_of_records'])
zdb_data = pd.DataFrame()

for title in titles:
    query = f'tit={title}'
    # fetch the number of the results
    records = dnb_sru(query=query, library='zdb', start=1, max_records=1000)
    length_records = len(records)
    print(f'Tile: {title} Results: {length_records}')
    # append to the dataframe
    df_records_per_title =  df_records_per_title.append({'title': title, 'number_of_records': length_records}, ignore_index=True)
    if 0 < length_records <= 100:
         output = [parse_record(record) for record in records]
         # append the records to the zdb_data
         zdb_data = zdb_data.append(output, ignore_index=True)
        
# save the number of records to csv
df_records_per_title.to_csv('number_records.csv')
df_records_per_title

### list titles with no values
Here we have no hits. That is fine.

In [None]:
titles_with_zero_records = df_records_per_title[df_records_per_title['number_of_records'] == 0]['title']

# Print the titles
print("Titles with 0 number_of_records:")
for title in titles_with_zero_records:
    print(title)


### list tiles with high values
There are to much records for the title found. This needs manual inspection.

In [None]:
df_records_per_title[df_records_per_title['number_of_records'] >= 100]


## XXX Search for subtitles
We will check for the subtitles in the ZDB
Es gibt keinen direkten Eintrag für die Untertitel in der ZDB.
Daher prüfen wir den vollständigen Titel (tst).

In [None]:
df_records_per_title = pd.DataFrame(columns=['title', 'number_of_records'])
zdb_data = pd.DataFrame()

for title in titles:
    query = f'tst={title}'
    # fetch the number of the results
    records = dnb_sru(query=query, library='zdb', start=1, max_records=1000)
    length_records = len(records)
    print(f'Tile: {title} Results: {length_records}')
    # append to the dataframe
    df_records_per_title =  df_records_per_title.append({'title': title, 'number_of_records': length_records}, ignore_index=True)
    if 0 < length_records <= 100:
        output = [parse_record(record) for record in records]
        # append the records to the zdb_data
        zdb_data = zdb_data.append(output, ignore_index=True)

# save the number of records to csv
df_records_per_title.to_csv('number_records.csv')
df_records_per_title

# print the records


In [None]:
zdb_data


## List all the entries which are available in CM

In [None]:
cm_data = zdb_data[zdb_data['electronic_access'].str.contains('http://sammlungen.ub.uni-frankfurt.de/cm/')]
cm_data

In [None]:
cm_data['title'].count()

In [None]:
zdb_data['title'].count()

In [None]:
zdb_data['title'].count() - cm_data['title'].count()

## XXX Check if the titles with no records can be found 

## Save the data to a csv file

In [None]:
zdb_data.to_csv('zdb_data.csv')

## Criteria 2: Dates

# Unmatched titles


In [None]:
# clean dates in df_metadata

# Check if values in 'date_column' match the yyyy-mm-dd format
date_format_regex = r'^\d{4}-\d{2}-\d{2}$'
is_valid_date = df_metadata['Datum'].str.match(date_format_regex)

# Filter the DataFrame to get rows where 'date_column' contains invalid dates
invalid_dates_df = df_metadata[~is_valid_date]  # Inverting the boolean mask with ~

# Print the rows with invalid dates
print("Rows with invalid dates:")
invalid_dates_df

In [None]:
df_metadata['Datum'] = pd.to_datetime(df_metadata['Datum'], errors='coerce')

# Filter the DataFrame to get rows where 'date_column' is before '1750-01-01' or is null
filtered_df = df_metadata[(df_metadata['Datum'] < '1750-01-01') | df_metadata['Datum'].isnull()]

# Print the filtered DataFrame
print("Filtered DataFrame:")
filtered_df

In [None]:
print(df_metadata['Volume_Caption'].unique().tolist())


In [None]:
# load the cleaned data for testing
df_metadata = pd.read_csv("metadata/CM_Seiten_Metadaten_cleaned.csv")
df_metadata.head(5)

In [None]:
# Replace "(null)" strings with pd.NaT
df_metadata['Datum'] = df_metadata['Datum'].replace('(null)', pd.NaT)

# Convert 'date' column to datetime type
df_metadata['Datum'] = pd.to_datetime(df_metadata['Datum'], errors='coerce')

# Group by 'title' and aggregate 'date' to find the minimum and maximum dates
grouped_df = df_metadata.groupby('Zs_Caption')['Datum'].agg(['min', 'max']).reset_index()

# Rename the columns to 'date_from' and 'date_to'
grouped_df.rename(columns={'min': 'date_from', 'max': 'date_to'}, inplace=True)

grouped_df['year_from'] = grouped_df['date_from'].dt.year.astype('Int64')
grouped_df['year_to'] = grouped_df['date_to'].dt.year.astype('Int64')

# Print the result
grouped_df

In [None]:
print(len(df_metadata['Zs_Caption'].unique()))

In [None]:
print(len(df_metadata['Zs_Caption'].unique()))

# XXX Ist in den bereinigten Daten die Spalte 'Zs_Caption' schon bereinigt von Duplikaten?
Besser ohne Duplikate checken
   

In [None]:
# Clean the Volume_Caption column
print(df_metadata['Volume_Caption'].unique().tolist())
# Damit kann man eigentlich nicht arbeiten

In [None]:
# Clean the Heft_Caption column
print(df_metadata['Heft_Caption'].unique().tolist())

In [None]:
import re
import numpy as np

def extract_year(text):
    if isinstance(text, str):
        pattern_heft = r'(Heft\s)(\d{4})'  # Match 4 digits after "Heft "
        text = re.remove(pattern_heft, text)
        
        pattern = r'(?<=Heft\s)(\d{4}(?:-\d{4})?)|(?<=\()(\d{4}(?:-\d{4})?)(?=\))'  # Match 4 digits or yyyy-yyyy range after "Heft " or within parentheses
        matches = re.findall(pattern, text)
        if matches:
            return matches[-1]  # Return the last match found
    return np.nan

df_metadata['Volume_Year'] = df_metadata['Volume_Caption'].apply(extract_year)
df_metadata

In [None]:
df_metadata['Heft_Year'] = df_metadata['Heft_Caption'].apply(extract_year)
df_metadata

In [None]:
print(df_metadata['Volume_Year'].unique().tolist())

In [None]:
print(df_metadata['Volume_Year'].dtype)

In [None]:
# convert the year if it is bigger than 5000 from Hebrew to gregorian
import pandas as pd
from jewish import JewishDate
# Hebcal does not install

# Convert non-missing values in the "Volume_Year" column to integers
df_metadata['Volume_Year'] = pd.to_numeric(df_metadata['Volume_Year'], errors='coerce')

def convert_to_gregorian(hebrew_year):
    # Create a JewishDate object for the Hebrew year
    jewish_date = JewishDate(year=hebrew_year, month=1, day=1)
    # Convert JewishDate object to Gregorian date
    gregorian_date = jewish_date.to_date()
    # Extract the Gregorian year
    gregorian_year = gregorian_date.year
    return gregorian_year

df_metadata['Volume_Year'].fillna(0, inplace=True)

df_metadata.loc[df_metadata['Volume_Year'] > 5000, 'Volume_Year'] = df_metadata.loc[df_metadata['Volume_Year'] > 5000, 'Volume_Year'].apply(convert_to_gregorian)

print(df_metadata)


In [None]:
print(df_metadata['Heft_Year'].unique().tolist())

# Matching

## Load the data

In [14]:
# convert the year_from and the year_to in df_data to datetime
df_data['year_from'] = pd.to_datetime(df_data['year_from'], format='%Y', errors='coerce')
df_data['year_to'] = pd.to_datetime(df_data['year_to'], format='%Y', errors='coerce')


In [5]:
df_metadata = pd.read_csv('metadata/CM_Seiten_Metadaten_aggregated.csv', sep='\t')

## Test the data

In [16]:
df_metadata.sample(5)

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To
13,Antisemitisches Jahrbuch für ...,1847.0,1922.0,1900-01-01,1903-01-01
89,Dibre Emeth,1851.0,1939.0,1845-01-01,1906-01-01
495,Blätter,,,1915-03-01,1921-12-22
34,"Bericht über die Jacobson-Schule, Realschule m...",1890.0,1890.0,1871-01-01,1930-01-01
324,Freie jüdische Lehrerstimme,,,1912-03-15,1920-10-15


In [93]:
len(df_data)

1327

In [42]:
df_data

Unnamed: 0,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,012981613,zdb,Führer durch die jüdische Gemeindeverwaltung u...,,JUDAICA,1932-1933,,340,,1932-01-01,1933-01-01
2,015310019,zdb,Führer durch die jüdische Wohlfahrtspflege in ...,,JUDAICA,1928-1929,,290,,1928-01-01,1929-01-01
4,1147584931,zdb,Actes et conférences de la Société des Études ...,,JUDAICA,1886-1889,,290,,1886-01-01,1889-01-01
6,015223736,zdb,Jüdischer Almanach für Groß-Rumänien,,JUDAICA,1922-,,290,,1922-01-01,1922-01-01
7,1143848942,zdb,Jüdischer Almanach,,JUDAICA,1902-1903,,910,,1902-01-01,1903-01-01
...,...,...,...,...,...,...,...,...,...,...,...
78704,1185662448,zdb,Jüdischer Volksfreund,"Monats-Beilage zum ""Israelit""",JUDAICA,1906,,290,,1906-01-01,1906-01-01
78705,1177932881,zdb,Beilage in Wiedergutmachungsfragen,,JUDAICA,1947-1947,,290,,1947-01-01,1947-01-01
78706,1199950920,zdb,Bericht des Vorstandes der Israelitischen Ster...,für das Jahr ...,,[1895?-1905],,360,,1895-01-01,1905-01-01
78708,015236781,zdb,Gemeindeblatt für die jüdischen Gemeinden Preu...,,JUDAICA,1934-1937,,070,,1934-01-01,1937-01-01


## Merge the titles

## Criteria 1: Title or subtitle

In [90]:
df_merged = pd.merge(df_metadata, df_data, left_on='Zs_Caption', right_on='title', how='left', copy=True)
df_merged

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,... Bericht der Jüdischen Haushaltungsschule z...,1835.0,1835.0,1821-01-01,1910-01-01,,,,,,,,,,NaT,NaT
1,... Bericht ueber den Verein für Westfalen und...,1932.0,1932.0,1836-01-01,1836-01-01,,,,,,,,,,NaT,NaT
2,... Bericht ueber den Verein für die Provinz W...,1871.0,1871.0,1835-01-01,1835-01-01,,,,,,,,,,NaT,NaT
3,... Jahresbericht der Israelitischen Waisenans...,1836.0,1836.0,1915-01-01,1917-01-01,,,,,,,,,,NaT,NaT
4,... Jahresbericht der Jüdischen Frauenvereinig...,1870.0,1870.0,1855-01-01,1922-01-01,1185652604,zdb,... Jahresbericht der Jüdischen Frauenvereinig...,,JUDAICA,2018,,290,,2018-01-01,2018-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,Jüdisches Gemeindeblatt für den Verband der Ku...,,,1937-08-01,1938-11-01,010269754,zdb,Jüdisches Gemeindeblatt für den Verband der Ku...,,JUDAICA,1937-1938,,290,,1937-01-01,1938-01-01
994,Jüdisches Gemeindeblatt für den Verband der Ku...,,,1937-08-01,1938-11-01,989711102,zdb,Jüdisches Gemeindeblatt für den Verband der Ku...,,JUDAICA,1937-1937,,070,,1937-01-01,1937-01-01
995,Aḥiasaf,,,1893-01-01,1922-01-01,1201291062,zdb,Aḥiasaf,"luaḥ-ʿam sifruti ṿe-shimushi, ʿim temunot ṿe-t...",JUDAICA,[1893-1922],,290,,1893-01-01,1922-01-01
996,Aḥiasaf,,,1893-01-01,1922-01-01,010120858,zdb,Aḥiasaf,"luaḥ-ʿam sifruti ṿe-shimushi, ʿim temunot ṿe-t...",JUDAICA,1893-1924,,290,,1893-01-01,1924-01-01


In [92]:
# find all unmatched titles
unmatched_titles = df_merged[df_merged['title'].isnull()]
unmatched_titles

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,... Bericht der Jüdischen Haushaltungsschule z...,1835.0,1835.0,1821-01-01,1910-01-01,,,,,,,,,,NaT,NaT
1,... Bericht ueber den Verein für Westfalen und...,1932.0,1932.0,1836-01-01,1836-01-01,,,,,,,,,,NaT,NaT
2,... Bericht ueber den Verein für die Provinz W...,1871.0,1871.0,1835-01-01,1835-01-01,,,,,,,,,,NaT,NaT
3,... Jahresbericht der Israelitischen Waisenans...,1836.0,1836.0,1915-01-01,1917-01-01,,,,,,,,,,NaT,NaT
5,... Verwaltungsbericht des Haupt-Grenz-Comité'...,1928.0,1929.0,1870-01-01,1870-01-01,,,,,,,,,,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,Bericht des Departements für Landwirtschaftlic...,,,1935-08-01,1935-08-01,,,,,,,,,,NaT,NaT
980,Political report of the Executive of the Jewis...,,,1937-08-01,1937-08-01,,,,,,,,,,NaT,NaT
985,Zuwachsverzeichnis für die Jahre ...,,,1924-01-01,1934-01-01,,,,,,,,,,NaT,NaT
991,Jahresbericht der Jugendgruppe (begründet von ...,,,1913-01-01,1913-01-01,,,,,,,,,,NaT,NaT


In [12]:
df_metadata_title = df_metadata['Zs_Caption']

In [64]:
len(df_metadata_title)

616

In [70]:
df_metadata_title.sample(5)

0      ... Bericht der Jüdischen Haushaltungsschule z...
1      ... Bericht ueber den Verein für Westfalen und...
2      ... Bericht ueber den Verein für die Provinz W...
3      ... Jahresbericht der Israelitischen Waisenans...
4      ... Jahresbericht der Jüdischen Frauenvereinig...
                             ...                        
611              Illustrirter jüdischer Familienkalender
612    Jahresbericht der Jugendgruppe (begründet von ...
613    Jüdisches Gemeindeblatt für den Verband der Ku...
614                                              Aḥiasaf
615                 Séance du comité de direction du ...
Name: Zs_Caption, Length: 616, dtype: object

In [10]:
df_data_title = df_data['title']

In [19]:
# Assuming df_metadata_title and df_data_title are the dataframes to be merged

# Step 1: Merge dataframes on 'Zs_Caption' and 'title'
merged_df = pd.merge(df_metadata_title, df_data_title, how='outer', left_on='Zs_Caption', right_on='title', suffixes=('_meta', '_data'))

# Step 2: Identify where titles are the same
same_titles = merged_df[merged_df['Zs_Ca ption'] == merged_df['title']]
print("Titles that are the same in both dataframes:")
print(len(same_titles))
same_titles[['Zs_Caption', 'title']]

Titles that are the same in both dataframes:
784


Unnamed: 0,Zs_Caption,title
8,... Jahresbericht der Jüdischen Frauenvereinig...,... Jahresbericht der Jüdischen Frauenvereinig...
9,... Jahresbericht der Jüdischen Frauenvereinig...,... Jahresbericht der Jüdischen Frauenvereinig...
12,Actes et conférences de la Société des Études ...,Actes et conférences de la Société des Études ...
13,Actes et conférences de la Société des Études ...,Actes et conférences de la Société des Études ...
14,Adressbuch für den jüdischen Buchhandel,Adressbuch für den jüdischen Buchhandel
...,...,...
987,Zionist library for boys and girls,Zionist library for boys and girls
988,Zionist library for boys and girls,Zionist library for boys and girls
989,Zionist library for boys and girls,Zionist library for boys and girls
996,Židovská ročenka pre Slovensko,Židovská ročenka pre Slovensko


In [16]:
# Step 3: Identify titles only in df_metadata
metadata_only = merged_df[merged_df['title'].isnull()]
print("\nTitles only in df_metadata:")
print(len(metadata_only))
metadata_only['Zs_Caption']89


Titles only in df_metadata:
214


0      ... Bericht der Jüdischen Haushaltungsschule z...
1      ... Bericht der Jüdischen Haushaltungsschule z...
2      ... Bericht ueber den Verein für Westfalen und...
3      ... Bericht ueber den Verein für Westfalen und...
4      ... Bericht ueber den Verein für die Provinz W...
                             ...                        
991                 Zuwachsverzeichnis für die Jahre ...
992    [Verein zur Pflege und Unterstützung israeliti...
993    [Verein zur Pflege und Unterstützung israeliti...
994           Österreichisch-ungarische Cantoren-Zeitung
995           Österreichisch-ungarische Cantoren-Zeitung
Name: Zs_Caption, Length: 214, dtype: object

In [17]:
# Step 4: Identify titles only in df_data
data_only = merged_df[merged_df['Zs_Caption'].isnull()]
print("\nTitles only in df_data:")
print(len(data_only))
data_only['title']


Titles only in df_data:
935


998                  Jüdischer Almanach für Groß-Rumänien
999                                                 Golem
1000       Jüdischer Almanach ... des Leo-Baeck-Instituts
1001                         KC-Blätter / Kartell-Convent
1002                        Bar Kochba-Hakoah Nachrichten
                              ...                        
1928                          Das jüdische Centralblatt
1929                          Das jüdische Centralblatt
1930                             Der israelitische Bote
1931    Statuten für die Achawa, Verein zur Unterstütz...
1932    Gemeindeblatt für die jüdischen Gemeinden Preu...
Name: title, Length: 935, dtype: object

In [80]:
df_merged_title = pd.merge(df_metadata_title, df_data_title, how='outer', left_on="Zs_Caption", right_on='title')

In [81]:
df_merged_title.drop_duplicates(subset='title')

Unnamed: 0,Zs_Caption,title
0,... Bericht der Jüdischen Haushaltungsschule z...,
8,... Jahresbericht der Jüdischen Frauenvereinig...,... Jahresbericht der Jüdischen Frauenvereinig...
12,Actes et conférences de la Société des Études ...,Actes et conférences de la Société des Études ...
14,Adressbuch für den jüdischen Buchhandel,Adressbuch für den jüdischen Buchhandel
16,Allgemeine Israelitische Wochenschrift,Allgemeine Israelitische Wochenschrift
...,...,...
1927,,Das Echo
1928,,Das jüdische Centralblatt
1930,,Der israelitische Bote
1931,,"Statuten für die Achawa, Verein zur Unterstütz..."


In [82]:
df_merged_title

Unnamed: 0,Zs_Caption,title
0,... Bericht der Jüdischen Haushaltungsschule z...,
1,... Bericht der Jüdischen Haushaltungsschule z...,
2,... Bericht ueber den Verein für Westfalen und...,
3,... Bericht ueber den Verein für Westfalen und...,
4,... Bericht ueber den Verein für die Provinz W...,
...,...,...
1928,,Das jüdische Centralblatt
1929,,Das jüdische Centralblatt
1930,,Der israelitische Bote
1931,,"Statuten für die Achawa, Verein zur Unterstütz..."


In [83]:
unmatched_titles = merged_df_title[merged_df_title['title'].isnull()]

In [86]:
unmatched_titles

Unnamed: 0,Zs_Caption,title
0,... Bericht der Jüdischen Haushaltungsschule z...,
2,... Bericht ueber den Verein für Westfalen und...,
4,... Bericht ueber den Verein für die Provinz W...,
6,... Jahresbericht der Israelitischen Waisenans...,
10,... Verwaltungsbericht des Haupt-Grenz-Comité'...,
...,...,...
944,Yediʿot shel Irgun Yotsʾe Shlezyah ʿIlit be-Yi...,
946,Yediʿot shel Irgun ʿOle Breslau be-Yiśraʾel,
990,Zuwachsverzeichnis für die Jahre ...,
992,[Verein zur Pflege und Unterstützung israeliti...,


In [85]:
unmatched_titles = unmatched_titles.drop_duplicates(subset='Zs_Caption')

In [40]:
# Assuming df_metadata_title and df_data_title are the dataframes to be merged

# Step 1: Merge dataframes on titles
merged_df_title = pd.merge(df_metadata_title, df_data_title, how='outer', left_on='Zs_Caption', right_on='title', suffixes=('_meta', '_data'))

# Drop duplicate rows based on the 'title_data' column
merged_df_title = merged_df_title.drop_duplicates(subset='title_data')

# Step 2: Merge on subtitles for unmatched rows
unmatched_titles = merged_df_title[merged_df_title['title_data'].isnull()]
merged_df_subtitle = pd.merge(unmatched_titles, df_data_title, how='left', left_on='Zs_Caption', right_on='subtitle', suffixes=('_meta', '_data'))

# Concatenate the merged dataframes
merged_df = pd.concat([merged_df_title, merged_df_subtitle], ignore_index=True)

# Step 3: Check for overlapping years
merged_df['Year_From'] = pd.to_datetime(merged_df['Year_From'])
merged_df['Year_To'] = pd.to_datetime(merged_df['Year_To'])
merged_df['date'] = pd.to_datetime(merged_df['date'])

# Create a function to check for year overlap
def check_year_overlap(row):
    if row['Year_To_meta'] < row['date'] or row['Year_From_meta'] > row['date']:
        return False
    return True

# Apply the function to create a new column indicating year overlap
merged_df['Year_Overlap'] = merged_df.apply(check_year_overlap, axis=1)

# Step 4: Create a matrix showing the differences in overlapping years
year_overlap_matrix = pd.pivot_table(merged_df, values='Year_Overlap', index='Zs_Caption', columns='title_data', aggfunc='first')

# Display the matrix
year_overlap_matrix.reset_index(inplace=True)
print(year_overlap_matrix)


KeyError: Index(['title_data'], dtype='object')

In [41]:
merged_df_title

Unnamed: 0,Zs_Caption,title
0,... Bericht der Jüdischen Haushaltungsschule z...,
1,... Bericht der Jüdischen Haushaltungsschule z...,
2,... Bericht ueber den Verein für Westfalen und...,
3,... Bericht ueber den Verein für Westfalen und...,
4,... Bericht ueber den Verein für die Provinz W...,
...,...,...
1928,,Das jüdische Centralblatt
1929,,Das jüdische Centralblatt
1930,,Der israelitische Bote
1931,,"Statuten für die Achawa, Verein zur Unterstütz..."


In [19]:
# Step 1: Merge dataframes on titles or subtitles
merged_df_title = pd.merge(df_metadata, df_data, how='inner', left_on='Zs_Caption', right_on='title', suffixes=('_meta', '_data'))

In [20]:
merged_df_title

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,... Jahresbericht der Jüdischen Frauenvereinig...,1870.0,1870.0,1855-01-01,1922-01-01,1185652604,zdb,... Jahresbericht der Jüdischen Frauenvereinig...,,JUDAICA,2018,,290,,2018-01-01,2018-01-01
1,... Jahresbericht der Jüdischen Frauenvereinig...,,,1855-01-01,1922-01-01,1185652604,zdb,... Jahresbericht der Jüdischen Frauenvereinig...,,JUDAICA,2018,,290,,2018-01-01,2018-01-01
2,Actes et conférences de la Société des Études ...,1892.0,1894.0,1886-01-01,1889-01-01,1147584931,zdb,Actes et conférences de la Société des Études ...,,JUDAICA,1886-1889,,290,,1886-01-01,1889-01-01
3,Actes et conférences de la Société des Études ...,,,1886-01-01,1889-01-01,1147584931,zdb,Actes et conférences de la Société des Études ...,,JUDAICA,1886-1889,,290,,1886-01-01,1889-01-01
4,Adressbuch für den jüdischen Buchhandel,1886.0,1889.0,1927-01-01,1927-01-01,013234129,zdb,Adressbuch für den jüdischen Buchhandel,,JUDAICA,1927-1927,,920,,1927-01-01,1927-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
779,Zionist library for boys and girls,1881.0,1907.0,1908-01-01,1908-01-01,1143836235,zdb,Zionist library for boys and girls,,JUDAICA,2017,,290,,2017-01-01,2017-01-01
780,Zionist library for boys and girls,,,1908-01-01,1908-01-01,018756476,zdb,Zionist library for boys and girls,,JUDAICA,1930-1949,,290,,1930-01-01,1949-01-01
781,Zionist library for boys and girls,,,1908-01-01,1908-01-01,1143836235,zdb,Zionist library for boys and girls,,JUDAICA,2017,,290,,2017-01-01,2017-01-01
782,Židovská ročenka pre Slovensko,1907.0,1914.0,1940-01-01,1940-01-01,1166255131,zdb,Židovská ročenka pre Slovensko,= Jüdisches Jahrbuch für die Slovakei,JUDAICA,1940-1940,,910,,1940-01-01,1940-01-01


In [23]:

# Merge on subtitle if title didn't match
unmatched_subtitle = merged_df[merged_df['title'].isnull()]
merged_df = merged_df.dropna(subset=['title'])
merged_df = pd.merge(unmatched_subtitle, df_data, how='left', left_on='Zs_Caption', right_on='subtitle')

# Concatenate the merged dataframes
merged_df = pd.concat([merged_df, unmatched_subtitle], ignore_index=True)

# Step 2: Check for overlapping years
merged_df['Year_From'] = pd.to_datetime(merged_df['Year_From'])
merged_df['Year_To'] = pd.to_datetime(merged_df['Year_To'])
merged_df['date'] = pd.to_datetime(merged_df['date'])

# Create a function to check for year overlap
def check_year_overlap(row):
    if row['Year_To'] < row['date'] or row['Year_From'] > row['date']:
        return False
    return True

# Apply the function to create a new column indicating year overlap
merged_df['Year_Overlap'] = merged_df.apply(check_year_overlap, axis=1)

# Step 3: Create a matrix showing the differences in overlapping years
year_overlap_matrix = pd.pivot_table(merged_df, values='Year_Overlap', index='Zs_Caption', columns='title', aggfunc='first')

# Display the matrix
print(year_overlap_matrix)

Empty DataFrame
Columns: []
Index: []


In [32]:
merged_df

Unnamed: 0,Zs_Caption,Year_From,Year_To,Date_From,Date_To,idn,library,title,subtitle,category,date,person,ddc,issn,year_from,year_to
0,... Bericht der Jüdischen Haushaltungsschule z...,1835.0,1835.0,1821-01-01,1910-01-01,,,,,,,,,,NaT,NaT
1,... Bericht der Jüdischen Haushaltungsschule z...,,,1821-01-01,1910-01-01,,,,,,,,,,NaT,NaT
2,... Bericht ueber den Verein für Westfalen und...,1932.0,1932.0,1836-01-01,1836-01-01,,,,,,,,,,NaT,NaT
3,... Bericht ueber den Verein für Westfalen und...,,,1836-01-01,1836-01-01,,,,,,,,,,NaT,NaT
4,... Bericht ueber den Verein für die Provinz W...,1871.0,1871.0,1835-01-01,1835-01-01,,,,,,,,,,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1928,,,,,,1158522371,zdb,Das jüdische Centralblatt,(zugleich Archiv für die Geschichte der Juden ...,JUDAICA,1882-1883,,910,,1882-01-01,1883-01-01
1929,,,,,,995014035,zdb,Das jüdische Centralblatt,zugleich Archiv für die Geschichte der Juden i...,,1882-1892,,910,,1882-01-01,1892-01-01
1930,,,,,,1161355766,zdb,Der israelitische Bote,,JUDAICA,1876-1879,,290,,1876-01-01,1879-01-01
1931,,,,,,018610110,zdb,"Statuten für die Achawa, Verein zur Unterstütz...",,JUDAICA,1889-1904,,290,,1889-01-01,1904-01-01
