In [1]:
import pandas as pd
from pyzotero import zotero

group_id = '5149914'
api_key = 'ca9nfF3QebWRnCOu2yx39luQ'

zot = zotero.Zotero(group_id, 'group', api_key)

In [7]:
collection_key = 'T7N74W2T' # Temperature

# Retrieve items from the specified collection
items = zot.collection_items(collection_key)

# Initialize an empty list to collect data
data = []

for item in items:
    # Check if the entry has a parent item (i.e., it's not a note or attachment)
    if 'parentItem' in item['data']:
        continue

    # Extract tags
    tags = item['data']['tags']

    # Extract the first author and year if creators exist
    creators = item['data'].get('creators', [])
    first_author = None
    for creator in creators:
        if creator.get('name'):
            first_author = creator['name']
            break
        elif creator.get('lastName'):
            first_author = creator['lastName']
            break

    year = item['data']['date']

    # Categorize the item by its tags
    for tag in tags:
        if tag['tag'].startswith("#"):
            tag_parts = tag['tag'][1:].split('/')
            category = tag_parts[0].strip()
            subcategory = tag_parts[1].strip() if len(tag_parts) > 1 else None
            auth = first_author[0:3] if first_author else None
            yr = year[2:4] if year else None
            data.append({
                'Category': category,
                'Subcategory': subcategory,
                'Key': f'{auth}{yr}'
            })

# Create a DataFrame
df = pd.DataFrame(data)

# Group and aggregate the data by Category and Subcategory
df_grouped = df.groupby(['Category', 'Subcategory']).agg(lambda x: list(x))

# Display the grouped DataFrame
print(df_grouped)


                                                                    Key
Category Subcategory                                                   
CORDEX   EUR-11       [Cop21, Cas20, Est22, Día23, Mol20, Fer19, Lor...
         EUR-44                                   [Bañ22, Mol20, Fer19]
         MED-11                                                 [Fer19]
         MED-44                                                 [Fer19]
         MENA-44                                                [Dri20]
...                                                                 ...
VAR      sfcWind                                                [Die23]
         tas          [Cop21, Cas20, Cab20, de 21, Est22, Bañ22, Wan...
         tasmax       [Cop21, Cas20, Cab20, de 21, Ven23, De 23, Wan...
         tasmin       [Cop21, Cas20, Cab20, de 21, De 23, Wan21, Amb...
         zg500                                                  [Ven23]

[79 rows x 1 columns]


In [3]:
item['data']

{'key': 'E6TN8LCI',
 'version': 107,
 'itemType': 'journalArticle',
 'title': 'Future changes of hot extremes in Spain: towards warmer conditions',
 'creators': [{'creatorType': 'author',
   'firstName': 'M. N.',
   'lastName': 'Lorenzo'},
  {'creatorType': 'author', 'firstName': 'I.', 'lastName': 'Alvarez'}],
 'abstractNote': 'One of the most relevant effects of climate change is its influence on the frequency and intensity of extreme events. The analysis and understanding of these events are of great importance due to the probability of causing environmental and social damage. In this study, we investigate changes in extreme hot temperature events over Spain for the near future (2021–2050) in relation to a control period (1971–2000) by using regional climate model simulations from the EURO-CORDEX project. The projection results show a significant increase in the number of extremely warm temperatures throughout the area. A significant strong increase in warm days and warm nights is pr

In [8]:
html_table = df_grouped.to_html()
with open('tag-table.html', 'w') as html_file:
    html_file.write(html_table)

In [10]:
pd.set_option('display.max_rows', None)
display(df_grouped)

Unnamed: 0_level_0,Unnamed: 1_level_0,Key
Category,Subcategory,Unnamed: 2_level_1
CORDEX,EUR-11,"[Cop21, Cas20, Est22, Día23, Mol20, Fer19, Lor..."
CORDEX,EUR-44,"[Bañ22, Mol20, Fer19]"
CORDEX,MED-11,[Fer19]
CORDEX,MED-44,[Fer19]
CORDEX,MENA-44,[Dri20]
PER,1900-2010,[Her22]
PER,1950-2020,[Ven23]
PER,1971-2000,"[Día23, Mol20, Fer19, Lor21, Lor22]"
PER,1971-2005,[Gar20]
PER,1975-2005,[Bañ22]
