# VICAV Bibliografie: Zotero Update

In [None]:
!ls

## Parse TEI File and extract xml:ids and zotero-uris

In [None]:
tei_bibl_filename = "vicav_biblio_tei_zotero.xml"

In [None]:
#import Package eTree to parse XML Files
import xml.etree.ElementTree as ET

In [None]:
tei_bibl_file = ET.parse(tei_bibl_filename)
TEI = tei_bibl_file.getroot()

xmlns = {"tei": "http://www.tei-c.org/ns/1.0", "xml":"http://www.w3.org/XML/1998/namespace" }

In [None]:
#/TEI/text[1]/body[1]/listBibl[1]/biblStruct[1]
bibls = TEI.findall("tei:text/tei:body/tei:listBibl/tei:biblStruct", xmlns)

In [None]:
len(bibls)

In [None]:
bibl_data = []
for bibl in bibls:
    item = {}
    item["zotero_url"] = bibl.attrib['corresp']
    item["xml_id"] = bibl.get('{http://www.w3.org/XML/1998/namespace}id')
    item["zotero_id"] = item["zotero_url"].split("/items/")[1]
    bibl_data.append(item)

In [None]:
len(bibl_data)

In [None]:
bibl_data[0]

## Access Zotero-API

In [None]:
import requests
import json

In [None]:
api_token = "DQfd2PYuoDza5S0zMQdjzU6P"

In [None]:
#Group-id of Vicav library
group_id = "2165756"

In [None]:
#function to get the value of the "extra" field
def get_extra_field(zotero_id):
    request_url = "https://api.zotero.org/groups/" + group_id + "/items/" + zotero_id
    response = requests.get(request_url)
    if response.status_code == 200:
        parsed = json.loads(response.text)
        
        extra = parsed["data"]["extra"]
        
        if extra.startswith("(biblid:"):
            extra = extra.split("(biblid:")[1].split(")")[0]
    else: 
        extra = None        
    
    return extra

In [None]:
#test
test_id = bibl_data[0]["zotero_id"]
get_extra_field(test_id)

In [None]:
for item in bibl_data:
    if "biblid" not in item:
        extra = get_extra_field(item["zotero_id"])
        if extra != "":
            item["biblid"] = extra

In [None]:
bibl_data[4]

In [None]:
with_id = []
for item in bibl_data:
    if 'biblid' in item:
        with_id.append(item) 

In [None]:
len(with_id)

In [None]:
missing = []
for item in bibl_data:
    if 'biblid' not in item:
        missing.append(item) 

In [None]:
len(missing)

In [None]:
missing[76]

In [None]:
n = 0
errors = []
for item in missing:
    print(str(n)+ "/" + str(len(missing)))
    n = n+ 1
    if "biblid" not in item:
        extra = get_extra_field(item["zotero_id"])
        if extra != "":
            item["biblid"] = extra
        else:
            print("Problem with:" + item["xml_id"])
            errors.append(item)

In [None]:
really_missing = []
for item in missing:
    if 'biblid' not in item:
        really_missing.append(item) 

In [None]:
len(really_missing)

In [None]:
really_missing[5]

In [None]:
for item in really_missing:
    print(item["zotero_url"])

In [None]:
#with open('bibl_data.json', 'w') as outfile:
#    json.dump(bibl_data, outfile)

## Read all items in the library

In [None]:
def get_items(group_id,limit,start):
    request_url = "https://api.zotero.org/groups/" + group_id + "/items/" + "?limit=" + limit + "&start=" + start
    response = requests.get(request_url)
    if response.status_code == 200:
        parsed = json.loads(response.text)
        
    return parsed

## https://api.zotero.org/groups/2165756/items/

In [None]:
all_items=get_items("2165756","20","3")

In [None]:
len(all_items)

In [None]:
def total_number_items(group_id):
    request_url = "https://api.zotero.org/groups/" + group_id + "/items/"
    response = requests.get(request_url)
    
    return response.headers["Total-Results"]

In [None]:
total_number_items("2165756")

In [None]:
def get_groupheaders(group_id):
    request_url = "https://api.zotero.org/groups/" + group_id + "/items/"
    response = requests.get(request_url)
    
    return response.headers
    

In [None]:
get_groupheaders("2165756")["Link"].split('; rel="next",')[0].replace("<","").replace(">","")

In [None]:
limit="100"
start="101"
request_url = "https://api.zotero.org/groups/" + group_id + "/items/" + "?limit=" + limit + "&start=" + start
request_url

In [None]:
def get_all_items(group_id):
    items=[]
    total_number=int(total_number_items(group_id))
    turns=total_number/100
    
    return total_number,turns
    
    
    #request_url = "https://api.zotero.org/groups/" + group_id + "/items/"
    #response = requests.get(request_url)
    

In [None]:
get_all_items("2165756")

# need to read again

In [None]:
#read backup file

with open('bibl_data.json', 'r') as infile:
    bibl_data = json.load(infile)

len(bibl_data)

In [None]:
bibl_data[0]

In [None]:
missing = []
for item in bibl_data:
    if 'biblid' not in item:
        missing.append(item) 

In [None]:
len(missing)

In [None]:
missing[0]

In [None]:
import re
import random

In [None]:
def id_to_biblid(xml_id:str):
    m = re.search('(\w*)([0-9]{4})(\w?)', xml_id)
    name = m.group(1)
    year = m.group(2)
    suffix = m.group(3)
    #print("Name: " + name + " Year: " + year + " Suffix: " + suffix )
    r = random.randint(1,9999)
    randnr = "%04d" % r
    new_id = name.lower() + "_" + year + "_" + randnr
    return new_id

In [None]:
#test
testid = missing[0]["xml_id"] 
id_to_biblid(testid)

In [None]:
#uris MUST use https://

In [None]:
#https://api.zotero.org/groups/2165756/items/G2MMP7G2

In [None]:
api_token = "DQfd2PYuoDza5S0zMQdjzU6P"
headers = {'Authorization': 'Bearer ' + api_token}

In [None]:
# Function to update zotero-item

def update_zotero_extra_in_item(group_id:str, item_id:str, extra:str):
    #test_url = "https://api.zotero.org/groups/4669401/items/P4TV9RFP"
    
    url = "https://api.zotero.org/groups/" + group_id + "/items/" + item_id
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        parsed = json.loads(response.text)
        
        if "extra" in parsed["data"]:
            old_extra_val = parsed["data"]["extra"]
            #print("Existing 'extra' field value was: " + old_extra_val)
            
            #updating
            parsed["data"]["extra"] = extra
            
            update = requests.put(url, headers=headers, json=parsed)
            if update.status_code == 204:
                #everything worked
                #return True as idicator, that everything worked as expected, and the old "extra" value (could be stored somewhere for safe-keeping)
                return [True,old_extra_val]
            else:
                print("Error when updating item. Status code:" + str(update.status_code))
        
    else:
        print("Error when getting item. Status code: " + str(response.status_code))    
    

In [None]:
missing[0]

In [None]:
#vicav group
group_id

In [None]:
#working example
testitem = missing[0]
testid = missing[0]["xml_id"]
item_id = testitem["zotero_id"]
new_id = id_to_biblid(testid)
new_extra = "(" + "biblid:" + new_id + ")"
print(new_extra)
#group_id
updated = update_zotero_extra_in_item(group_id, item_id, new_extra)
updated

In [None]:
for item in missing:
    print("Updating " + item["xml_id"])
    try:
        new_id = id_to_biblid(item["xml_id"])
    except:
        new_id = None
        print("error to generate id: " + item["xml_id"])
    
    if new_id != None:
        new_extra = "(" + "biblid:" + new_id + ")"
        item["biblid"] = new_id
        updated = update_zotero_extra_in_item(group_id, item["zotero_id"], new_extra)
        item["updated"] = updated[0]
        item["old_extra_val"] = updated[1]

In [None]:
missing[4]

In [None]:
missing[0]