# 03: Bibliometric Analysis with LitStudy

Analyze publication trends, authors, and venues using LitStudy.

## Features
- Publication year distributions
- Top authors and venues
- Citation statistics
- Geographic distribution (via OpenAlex)

In [None]:
# Import LitStudy
try:
    from litstudy import sources, plot, types
    print("LitStudy imported successfully")
except ImportError:
    print("Install LitStudy: pip install litstudy")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

## Load Data from Semantic Scholar

Use LitStudy to fetch papers from Semantic Scholar API.

In [None]:
# Example: Search Semantic Scholar via LitStudy
# docs = sources.search_semanticscholar("procedural content generation games", limit=100)
# print(f"Found {len(docs)} papers")

## Publication Year Analysis

In [None]:
def plot_year_histogram(docs, title="Publication Year Distribution"):
    """Plot publication year distribution."""
    years = [doc.year for doc in docs if doc.year]
    
    plt.figure(figsize=(10, 5))
    plt.hist(years, bins=range(min(years), max(years)+2), edgecolor='black', alpha=0.7)
    plt.xlabel('Year')
    plt.ylabel('Number of Publications')
    plt.title(title)
    plt.tight_layout()
    return plt.gcf()

# plot_year_histogram(docs, "PCG Research Over Time")

## Top Authors Analysis

In [None]:
def get_top_authors(docs, n=15):
    """Get most prolific authors."""
    from collections import Counter
    
    authors = []
    for doc in docs:
        if doc.authors:
            authors.extend(doc.authors)
    
    author_counts = Counter(authors)
    return author_counts.most_common(n)

# top_authors = get_top_authors(docs)
# for author, count in top_authors:
#     print(f"{author}: {count} papers")

## Venue Analysis

In [None]:
def get_top_venues(docs, n=10):
    """Get top publication venues."""
    from collections import Counter
    
    venues = [doc.venue for doc in docs if doc.venue]
    venue_counts = Counter(venues)
    return venue_counts.most_common(n)

# top_venues = get_top_venues(docs)
# for venue, count in top_venues:
#     print(f"{venue}: {count} papers")

## Export Visualizations

Save figures for use in thesis.

In [None]:
# fig = plot_year_histogram(docs, "PCG Research Trends 2010-2024")
# fig.savefig('../data/visualizations/pcg_year_distribution.png', dpi=300, bbox_inches='tight')
# fig.savefig('../data/visualizations/pcg_year_distribution.pdf', bbox_inches='tight')