In [None]:
%load_ext autoreload
%autoreload 2

from papertalk.semantic_scholar import SemanticScholar
from papertalk.paper_parser import (
    PaperDownloader, 
    ReferenceParser, 
    InTextReferenceParser, 
    merge_all_references
)
from papertalk.rag import (
    PaperVectorStore,
    AdvancedRAG,
    PaperSummarizer,
    PaperRelevance
)

In [None]:
arxiv_id = "2201.03540"

# Download the paper
downloader = PaperDownloader()
files = downloader.download_source(arxiv_id)

# Get references from Semantic Scholar
ss = SemanticScholar()
paper = ss.get_paper_with_references("Erasure conversion for fault tolerant quantum computing in alkaline earth Rydberg atom arrays")
semantic_scholar_references = paper.references

# Parse the references
parser = ReferenceParser()
bibtex_references = parser.extract_references(files)
in_text_reference_parser = InTextReferenceParser(context_window=3)  # Get 2 sentences before and after
in_text_references = in_text_reference_parser.parse_references(files)
semantic_scholar_papers = paper.references
comprehensive_refs = merge_all_references(
    bibtex_references=bibtex_references,
    semantic_scholar_papers=semantic_scholar_papers,
    in_text_references=in_text_references,
    require_all_sources=True)

# Summarize the paper
summarizer = PaperSummarizer()
summary = summarizer.summarize(files)

# Initialize vector store
vectorstore = PaperVectorStore()
vectorstore.build_index([ref.semantic_scholar_paper for ref in comprehensive_refs])

In [None]:
rag = AdvancedRAG(vectorstore)
query = f"{summary.main_points[0]} {summary.methodology}"
similar_papers = rag.find_similar_papers(query)

In [None]:
relevance_analyzer = PaperRelevance()
citation_analysis = relevance_analyzer.analyze_citations(
    original_paper_summary=summary,
    reference=comprehensive_refs[0]
)