In [1]:
import os
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "True"
from collections import Counter
import json

In [2]:
refs = Reference.objects.all()

In [30]:
test = refs.first()


In [31]:
test._meta.fields

(<django.db.models.fields.AutoField: id>,
 <django.db.models.fields.URLField: bibs_url>,
 <django.db.models.fields.PositiveIntegerField: pages_start>,
 <django.db.models.fields.PositiveIntegerField: pages_end>,
 <django.db.models.fields.TextField: bibtex>,
 <django.db.models.fields.related.ForeignKey: content_type>,
 <django.db.models.fields.PositiveIntegerField: object_id>,
 <django.db.models.fields.CharField: attribute>,
 <django.db.models.fields.DateTimeField: last_update>,
 <django.db.models.fields.CharField: folio>,
 <django.db.models.fields.CharField: notes>)

In [3]:
def get_bibtex_dict(ref):
    return json.loads(ref.bibtex)

def get_bibtex_key_tuple(ref):
    bibtex_dict = get_bibtex_dict(ref)
    return tuple(bibtex_dict)

def get_all_bibtex_values_for_key(refs, key):
    res = []
    for ref in refs: 
        bibtex_dict = get_bibtex_dict(ref)
        if key in bibtex_dict.keys(): 
            value = bibtex_dict.get(key)
            res.append((value))
    return res

def check_field_always_has_value(refs, key): 
    values = get_all_bibtex_values_for_key(refs, key)
    for check in ["", None]:
        if check in set(values):
            return False
        
    return True

def count_key_occurences_in_refs(refs):
    C = Counter()
    for ref in refs: 
        for key in get_bibtex_dict(ref).keys(): 
            C.update({key})
    
    return C

In [4]:
C = Counter()
for el in refs: 
    C.update({get_bibtex_key_tuple(el)})

In [5]:
all_bibtex_toplevel_fields = set([k for keys in C.keys() for k in keys])

In [6]:
all_bibtex_toplevel_fields

{'URL',
 'accessed',
 'call-number',
 'event-place',
 'id',
 'issued',
 'language',
 'note',
 'page',
 'publisher-place',
 'shortTitle',
 'title',
 'type'}

In [7]:
res = get_all_bibtex_values_for_key(refs, "event-place")
    

In [8]:
key_vals = {k: get_all_bibtex_values_for_key(refs, k) for k in all_bibtex_toplevel_fields}

In [80]:
key_vals

{'shortTitle': ['Kaiserlich-Königlicher Hof- und Ehrenkalender 1773, Wien: Schilgische Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1774, Wien: Schilgische Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1775, Wien: Schilgische Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1776, Wien: Schilgische Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1777, Wien: Schilgische Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1778, Wien: Schilgische Erben.',
  'Hof- und Staatsschematismus 1779, Wien: Joseph Gerold, ksl. Reichshof- und Universitätsbuchdrucker.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1780, Wien: Schilgische Erben.',
  'Hof- und Staatsschematismus 1781, Wien: Joseph Gerold, ksl. Reichshof- und Universitätsbuchdrucker.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1783, Wien: Edle von Ghelenschen Erben.',
  'Kaiserlich-Königlicher Hof- und Ehrenkalender 1784, Wien: Edle von Ghelenschen Erben.',
  'Hof- und Staatssc

In [81]:
ref_count = len(refs)

In [82]:
ref_count

195692

In [83]:
C

Counter({('id', 'type', 'title', 'URL', 'shortTitle', 'issued'): 75337,
         ('id', 'type', 'title', 'call-number', 'shortTitle', 'issued'): 48836,
         ('id', 'type', 'title', 'issued'): 40146,
         ('id',
          'type',
          'title',
          'URL',
          'call-number',
          'shortTitle',
          'issued'): 15687,
         ('id',
          'type',
          'title',
          'URL',
          'call-number',
          'note',
          'shortTitle',
          'issued'): 12536,
         ('id', 'type', 'title', 'URL', 'note', 'shortTitle', 'issued'): 1540,
         ('id',
          'type',
          'title',
          'call-number',
          'note',
          'shortTitle',
          'issued'): 909,
         ('id', 'type', 'title', 'shortTitle', 'issued'): 693,
         ('id',
          'type',
          'title',
          'publisher-place',
          'page',
          'event-place',
          'URL',
          'language',
          'issued',
          'ac

In [84]:
counts = count_key_occurences_in_refs(refs)

In [86]:
ref_count, counts

(195692,
 Counter({'id': 195692,
          'type': 195692,
          'title': 195692,
          'issued': 195692,
          'shortTitle': 155538,
          'URL': 105108,
          'call-number': 77968,
          'note': 14985,
          'publisher-place': 8,
          'page': 8,
          'event-place': 8,
          'language': 8,
          'accessed': 8}))

In [92]:
key_vals.get("call-number")

['HHStA Bib BL 38/1773',
 'HHStA Bib BL 38/1774',
 'HHStA Bib BL 38/1775',
 'WB A-39351/1776',
 'WB A-39351/1777',
 'HHStA Bib BL 38/1778',
 'HHStA Bib BL 37/1779',
 'WB A-39351/1780',
 'WB A-9866/1781',
 'HHStA Bib BL 38/1783',
 'HHStA Bib BL 38/1784',
 'WB A-9866/1785',
 'HHStA Bib BL 38/1786',
 'HHStA Bib BL 38/1787',
 'HHStA Bib BL 38/1788',
 'HHStA Bib BL 38/1789',
 'HHStA Bib BL 38/1790',
 'HHStA Bib BL 38/1791',
 'HHStA Bib BL 38/1792',
 'HHStA Bib BL 38/1793',
 'HHStA Bib BL 38/1794',
 'HHStA Bib BL 38/1795',
 'HHStA Bib BL 38/1796',
 'WB A-39351/1797',
 'ÖNB 544720-B.1749 ALT',
 'StB Bamberg Cal.q.44-a/1764',
 'HHStA Bib BL 38/1765',
 'HHStA Bib BL 38/1766',
 'HHStA Bib BL 38/1767',
 'HHStA Bib BL 38/1745',
 'HHStA Bib BL 38/1746',
 'HHStA Bib BL 38/1745',
 'HHStA Bib BL 38/1746',
 'HHStA Bib BL 38/1734',
 'HHStA Bib BL 38/1735',
 'HHStA Bib BL 38/1736',
 'HHStA Bib BL 38/1737',
 'HHStA Bib BL 38/1738',
 'HHStA Bib BL 38/1739',
 'ÖNB 544720-B.1740 ALT',
 'HHStA Bib BL 38/1745'

In [10]:

ids = []
urls = []
for ref in refs: 
    ids.append(json.loads(ref.bibtex).get("id"))

In [11]:
len(ids)

195692

In [13]:
ids[1]

'14346851/X69RDFQG'

In [15]:
os.environ["ZOTERO_GROUP"]

'4980632'

In [16]:
test = Reference.objects.first()

In [17]:
test

<Reference: Reference object (95007)>

In [19]:
test._meta.fields

(<django.db.models.fields.AutoField: id>,
 <django.db.models.fields.URLField: bibs_url>,
 <django.db.models.fields.PositiveIntegerField: pages_start>,
 <django.db.models.fields.PositiveIntegerField: pages_end>,
 <django.db.models.fields.TextField: bibtex>,
 <django.db.models.fields.related.ForeignKey: content_type>,
 <django.db.models.fields.PositiveIntegerField: object_id>,
 <django.db.models.fields.CharField: attribute>,
 <django.db.models.fields.DateTimeField: last_update>,
 <django.db.models.fields.CharField: folio>,
 <django.db.models.fields.CharField: notes>)

In [23]:
bibs_urls = Reference.objects.values_list("bibs_url", flat=True)

In [25]:
urls = list(bibs_urls)

In [26]:
len(urls), Reference.objects.count(), len(set(urls)), len(set(ids))

(195692, 195692, 288, 288)

In [28]:
test.bibtex_url

AttributeError: 'Reference' object has no attribute 'bibtex_url'

In [29]:
PlaceEvent

apis_core.apis_relations.models.PlaceEvent