In [2]:
from fastapi import FastAPI, Query, HTTPException
from typing import List, Optional
import requests
# from openai import OpenAI
# from google import genai
import google.generativeai as genai
# from bs4 import BeautifulSoup
import re, time

In [3]:
CROSSREF_API_URL = "https://api.crossref.org/works"
SEMANTIC_SCHOLAR_API_URL = "https://api.semanticscholar.org/v1/paper"
DOI_API_URL = "https://doi.org"

In [4]:
def clean_abstract(abstract: str) -> str:
    return re.sub(r'<.*?>|Abstract', '', abstract)

def format_citation(authors: List[dict], year: str, title: str, journal: str, volume: str, issue: str, pages: str) -> str:
    formatted_authors = ", ".join([f"{a['family']}, {a['given'][0]}." for a in authors])
    return f"{formatted_authors} ({year}). {title}. {journal}, {volume}({issue}), {pages}."

def get_semantic_scholar_info(doi: str, api_key: str):
    time.sleep(1)  # Rate limit: 1 request per second
    headers = {
        'x-api-key': api_key,
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    params = {'fields': 'title,abstract,authors,keywords'}
    try: 
        response = requests.get(f"{SEMANTIC_SCHOLAR_API_URL}/DOI:{doi}", headers=headers, params=params)
        # if response.status_code == 200:
        print("Semantic result: ", params, response.json())
        return response.json()
    except requests.exceptions.HTTPError as e:
        print("Semantic fail: ", params, headers, f"{SEMANTIC_SCHOLAR_API_URL}/DOI:{doi}, str(e)")
        return None

def process_paper_data(item, semantic_data=None):
    title = item.get("title", [""])[0]
    authors = item.get("author", [])
    doi = item.get("DOI", "")
    abstract = clean_abstract(item.get("abstract", "No abstract available"))
    publisher = item.get("publisher", "Unknown")
    journal = item.get("short-container-title", ["Unknown"])[0]
    published_date = item.get("published-print", {}).get("date-parts", [["Unknown"]])[0]
    published_date_str = "-".join(map(str, published_date)) if published_date != ["Unknown"] else "Unknown"

    first_author = f"{authors[0].get('given', '')} {authors[0].get('family', '')}" if authors else "Unknown"
    authors_list = ", ".join([f"{a.get('given', '')} {a.get('family', '')}" for a in authors]) if authors else "Unknown"

    citation = format_citation(authors, str(published_date[0]), title, journal, item.get("volume", ""), item.get("issue", ""), item.get("page", ""))
    url = f"{DOI_API_URL}/{doi}"

    keywords = ""
    if semantic_data and "keywords" in semantic_data:
        keywords = ", ".join(semantic_data["keywords"])

    return {
        "DOI": doi,
        "title": title,
        "firstAuthor": first_author,
        "authors": authors_list,
        "publisher": publisher,
        "journal": journal,
        "published_date": published_date_str,
        "citation": citation,
        "keyword": keywords,
        "abstract": abstract,
        "URL": url
    }

In [4]:
def search_crossref(keyword: str, author: Optional[str] = None, semantic_scholar_key: Optional[str] = None):
    query_params = {"query.title": keyword, "rows": 5}
    if author:
        query_params["query.author"] = author

    response = requests.get(CROSSREF_API_URL, params=query_params)
    if response.status_code == 200:
        data = response.json()
        papers = []

        for item in data["message"]["items"]:
            doi = item.get("DOI", "")
            semantic_data = get_semantic_scholar_info(doi, semantic_scholar_key) if semantic_scholar_key and doi else None
            paper_info = process_paper_data(item, semantic_data)
            papers.append(paper_info)

        return papers
    else:
        raise HTTPException(status_code=response.status_code, detail="Failed to fetch from CrossRef API")

def search_by_doi(doi: str, semantic_scholar_key: Optional[str] = None):
    response = requests.get(f"{CROSSREF_API_URL}/{doi}")
    if response.status_code == 200:
        item = response.json()["message"]
        semantic_data = get_semantic_scholar_info(doi, semantic_scholar_key) if semantic_scholar_key else None
        return process_paper_data(item, semantic_data)
    else:
        raise HTTPException(status_code=response.status_code, detail="Failed to fetch from CrossRef API")


In [5]:
def analyze_with_gemini(info: dict, gemini_api_key: str, custom_prompt: str):
    """
    Analyze paper information using Gemini AI with a custom prompt.
    """
    try:
        # Initialize the Gemini API with the API key
        genai.configure(api_key=gemini_api_key)
        
        # Create the generation config
        generation_config = {
            "temperature": 0.9,
            "top_p": 1,
            "top_k": 1,
            "max_output_tokens": 2048,
        }

        # Initialize the model
        model = genai.GenerativeModel(
            model_name="gemini-pro",
            generation_config=generation_config
        )
        
        prompt = f"""{custom_prompt}

Paper Information:
- DOI: {info['DOI']}
- Title: {info['title']}
- First Author: {info['firstAuthor']}
- Corresponding Author: {info['correspondingAuthor']}
- Corresponding Email: {info['correspondenceTo']}
- Authors: {info['authors']}
- Citation: {info['citation']}
- Keyword: {info['keyword']}

Please provide a detailed analysis including:
1. Key research contributions
2. Methodology highlights
3. Main findings
4. Potential impact on the field
"""

        # Generate response
        response = model.generate_content(prompt)
        return response.text

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Gemini API error: {str(e)}")



In [7]:
gpt_api_key = ""
semantic_scholar_key = ""
keyword = "Eddy-Kuroshio interaction processes revealed by mooring observations off Taiwan and Luzon"
author = "Cheng-Ju Tsai"
custom_prompt = "Please analyze this research paper and provide key points in both English and 繁體中文."
model = "gemini-2.0-flash" #"gpt-3.5-turbo"
LLM = 'genai' #'openai'

In [12]:
papers = search_crossref(keyword, author)
doi = "10.1016/j.tecto.2021.229047" #"10.1002/2015GL065814"
use_ai = False
if True:    
    try:
        if doi:
            papers = [search_by_doi(doi, semantic_scholar_key)]
        else:
            if not keyword:
                raise HTTPException(status_code=400, detail="Keyword is required if DOI is not provided.")
            papers = search_crossref(keyword, author, semantic_scholar_key)

        results = []
        for paper in papers:
            if use_ai and gpt_api_key:
                try:
                    paper["keypoint"] = analyze_with_gemini(paper, gpt_api_key, custom_prompt)
                except Exception as e:
                    paper["keypoint"] = f"Error analyzing paper: {str(e)}"
            #results.append(PaperInfo(**paper))

        #return results

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

papers    

Semantic result:  {'fields': 'title,abstract,authors,keywords'} {'abstract': None, 'arxivId': None, 'authors': [{'authorId': '49970758', 'name': 'Ching‐Hui Tsai', 'url': 'https://www.semanticscholar.org/author/49970758'}, {'authorId': '144292993', 'name': 'S. Hsu', 'url': 'https://www.semanticscholar.org/author/144292993'}, {'authorId': '31320894', 'name': 'Song-chuen Chen', 'url': 'https://www.semanticscholar.org/author/31320894'}, {'authorId': '105044345', 'name': 'Shiou‐Ya Wang', 'url': 'https://www.semanticscholar.org/author/105044345'}, {'authorId': '31897255', 'name': 'Lien-Kai Lin', 'url': 'https://www.semanticscholar.org/author/31897255'}, {'authorId': '2107818060', 'name': 'P. Huang', 'url': 'https://www.semanticscholar.org/author/2107818060'}, {'authorId': '2110719445', 'name': 'Kuan-Ting Chen', 'url': 'https://www.semanticscholar.org/author/2110719445'}, {'authorId': '11934743', 'name': 'Hsiao-Shan Lin', 'url': 'https://www.semanticscholar.org/author/11934743'}, {'authorId':

[{'DOI': '10.1016/j.tecto.2021.229047',
  'title': 'Active tectonics and volcanism in the southernmost Okinawa Trough back-arc basin derived from deep-towed sonar surveys',
  'firstAuthor': 'Ching-Hui Tsai',
  'authors': 'Ching-Hui Tsai, Shu-Kun Hsu, Song-Chuen Chen, Shiou-Ya Wang, Lien-Kai Lin, Pi-Chun Huang, Kuan-Ting Chen, Hsiao-Shan Lin, Chin-Wei Liang, Yen-Yu Cho',
  'publisher': 'Elsevier BV',
  'journal': 'Tectonophysics',
  'published_date': '2021-10',
  'citation': 'Tsai, C., Hsu, S., Chen, S., Wang, S., Lin, L., Huang, P., Chen, K., Lin, H., Liang, C., Cho, Y. (2021). Active tectonics and volcanism in the southernmost Okinawa Trough back-arc basin derived from deep-towed sonar surveys. Tectonophysics, 817(), 229047.',
  'keyword': '',
  'abstract': 'No abstract available',
  'URL': 'https://doi.org/10.1016/j.tecto.2021.229047'}]

In [None]:
semantic_data = get_semantic_scholar_info("10.1016/j.dsr.2018.11.002", "YOUR_KEY")
print(semantic_data)

Semantic result:  {'fields': 'title,abstract,authors,keywords'} {'abstract': None, 'arxivId': None, 'authors': [{'authorId': '48105567', 'name': 'Po‐Chun Hsu', 'url': 'https://www.semanticscholar.org/author/48105567'}, {'authorId': '31290707', 'name': 'Kai-Ho Cheng', 'url': 'https://www.semanticscholar.org/author/31290707'}, {'authorId': '152345321', 'name': 'S. Jan', 'url': 'https://www.semanticscholar.org/author/152345321'}, {'authorId': '121406220', 'name': 'Hung-Jen Lee', 'url': 'https://www.semanticscholar.org/author/121406220'}, {'authorId': '2864878', 'name': 'Chung‐Ru Ho', 'url': 'https://www.semanticscholar.org/author/2864878'}], 'citationVelocity': 0, 'citations': [{'arxivId': None, 'authors': [{'authorId': '2338580887', 'name': 'Po-Chun Hsu'}, {'authorId': '2338646452', 'name': 'Rose Angeli Tabanao Macagga'}, {'authorId': '2338646468', 'name': 'Roshin P. Raj'}], 'doi': '10.1080/19475705.2024.2448240', 'intent': [], 'isInfluential': False, 'paperId': '05896c96f2901956c460d89d