# DBLP Publication Visualizer

This notebook fetches and visualizes publication data for up to 3 authors using the DBLP API (Method 2). It includes pagination handling and provides bar plots, line plots, and pie charts for publication analysis.

In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

def fetch_publications(pid):
    base_url = f"https://dblp.org/pid/{pid}.json"
    limit = 1000
    offset = 0
    all_publications = []

    while True:
        url = f"{base_url}?offset={offset}&limit={limit}"
        response = requests.get(url)
        if response.status_code != 200:
            break
        data = response.json()
        hits = data.get("result", {}).get("hits", {}).get("hit", [])
        if not hits:
            break
        for hit in hits:
            info = hit.get("info", {})
            info['type'] = hit.get("type", "unknown")
            all_publications.append(info)
        offset += limit
    return pd.DataFrame(all_publications)

In [None]:
def visualize_publications(df, author_name):
    df = df[df['year'].notnull()]
    df['year'] = df['year'].astype(int)

    plt.figure(figsize=(10,5))
    sns.countplot(x="year", data=df, palette="Blues_d")
    plt.title(f"Publications per Year - {author_name}")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    year_counts = df['year'].value_counts().sort_index()
    cumulative = year_counts.cumsum()
    plt.figure(figsize=(10,5))
    plt.plot(cumulative.index, cumulative.values, marker='o')
    plt.title(f"Cumulative Publications Timeline - {author_name}")
    plt.xlabel("Year")
    plt.ylabel("Cumulative Publications")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(6,6))
    df['type'].value_counts().plot.pie(autopct='%1.1f%%', startangle=140)
    plt.title(f"Publication Type Distribution - {author_name}")
    plt.ylabel("")
    plt.tight_layout()
    plt.show()

In [None]:
authors = {
    "Michael Stonebraker": "s/MichaelStonebraker",
    "Geoffrey Hinton": "h/GeoffreyEHinton",
    "Yann LeCun": "l/YannLeCun"
}

for name, pid in authors.items():
    print(f"\nFetching publications for {name}...")
    df = fetch_publications(pid)
    print(f"Total publications: {len(df)}")
    visualize_publications(df, name)