In [None]:
import copy
from typing import List, Dict, Optional, Union, Tuple, Literal # Added Tuple

In [None]:
import os
import json

import sys
import os

parent_dir = os.path.dirname(os.getcwd())
sys.path.append(parent_dir)

In [None]:
from graph.paper_graph import PaperGraph
from graph.graph_viz import GraphViz

In [None]:
# driving examples
llm_api_key = os.getenv('GEMINI_API_KEY_3')
llm_model_name="gemini-2.0-flash"
embed_api_key = os.getenv('GEMINI_API_KEY_3')
embed_model_name="models/text-embedding-004"

research_topics = ["llm literature review"]
seed_dois = ['10.48550/arXiv.2406.10252',  # AutoSurvey: Large Language Models Can Automatically Write Surveys
            '10.48550/arXiv.2412.10415',  # Generative Adversarial Reviews: When LLMs Become the Critic
            '10.48550/arXiv.2402.12928',  # A Literature Review of Literature Reviews in Pattern Analysis and Machine Intelligence 
            ]
seed_titles = ['PaperRobot: Incremental Draft Generation of Scientific Ideas',
            'From Hypothesis to Publication: A Comprehensive Survey of AI-Driven Research Support Systems'
            ]

In [None]:
citation_limit = 100
author_paper_limit = 10

if len(seed_dois) < 10 or len(seed_titles) < 10:
    search_limit = 100
    recommend_limit = 100
else:
    search_limit = 50
    recommend_limit = 50

In [None]:
from collect.paper_data_collect import PaperCollector

ps = PaperCollector(   
    seed_research_topics = research_topics,   
    seed_paper_titles = seed_titles, 
    seed_paper_ids = seed_dois,
    from_dt = '2020-01-01',
    to_dt = '2025-04-30',
    fields_of_study = ['Computer Science'],
    author_paper_limit = author_paper_limit,
    search_limit = search_limit,
    recommend_limit = recommend_limit,
    citation_limit = citation_limit
    )

In [None]:
await ps.paper_search(seed_titles, seed_dois)

In [None]:
await ps.topic_search(
    topics = research_topics, 
    search_limit = search_limit, 
    from_dt = ps.from_dt,
    to_dt = ps.to_dt,
    fields_of_study = ps.fields_of_study
    )

In [None]:
await ps.authors_search(author_ids=['2280370148', '2220362282'])


In [None]:
await ps.paper_author_search(paper_ids=['arXiv:2504.16828', 'arXiv:2504.16084'])

In [None]:
await ps.reference_search(paper_ids=['arXiv:2504.16828', 'arXiv:2504.16084'])

In [None]:
await ps.citing_search(paper_ids=['arXiv:2402.03300', 'arXiv:1707.06347'])

In [None]:
await ps.paper_recommendation(
    pos_paper_ids=['arXiv:2504.16084', 'arXiv:2502.01456'], 
    neg_paper_ids=['arXiv:2209.15073', 'arXiv:2206.13717'])

In [None]:
from semanticscholar import AsyncSemanticScholar 
s2 = AsyncSemanticScholar()
test = await s2.get_paper_citations(paper_id='arXiv:2402.03300')

In [None]:
from semanticscholar.Paper import Paper
from semanticscholar.Author import Author
from semanticscholar.Citation import Citation
from semanticscholar.Reference import Reference
from semanticscholar.PaginatedResults import PaginatedResults

In [None]:
if isinstance(test, PaginatedResults) and test._items:
    for cit_item in test._items:
        print(cit_item)
        if not isinstance(cit_item, Citation) or not hasattr(cit_item, 'raw_data'): continue
        print(cit_item.paper)
        print(cit_item.paper)
        # get paper
        citing_paper = cit_item.paper if hasattr(cit_item, 'paper') else None

In [None]:
citing_paper

In [None]:
ref_item

In [None]:
ref_item.paper

In [None]:
help(ref_item)

In [None]:
from semanticscholar import SemanticScholar 
s1 = SemanticScholar()
test1 = s1.get_paper_references(paper_id='arXiv:2504.16828')

In [None]:
test1[0]

In [None]:
test.items

In [None]:
ps.explored_nodes

In [None]:
ps.not_found_nodes

In [None]:
for item in ps.data_pool['paper']:
    if item['paperId'] in ['da65443c52f3ec97215769fefa917f0a1156acff', 
                           '0d9e5e63df3a96c96ccf1a5931ae2fc1d6f5e414',
                           'ad10dddb8090bed89314ace1e658fc9dd504a8e7',
                           'c3be597fcaca180d4601c2ff72c6b380de1258ca',
                           '1fc6f990d01d4428533c3834d8cb5ba4820f8e36'
                           ]:
        print(item['title'])

In [None]:
len(ps.data_pool['author'])

In [None]:
len(ps.data_pool['paper'])

In [None]:
len(ps.data_pool['paper']