In [306]:
import os
import re
import json
import uuid
import requests
from typing import List, Dict, Any
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabase
from openai import OpenAI

class SemanticScholarAPI:
    BASE_URL = "https://api.semanticscholar.org/graph/v1"
    DEFAULT_PAPER_FIELDS = "paperId,title,abstract,year,citationCount,influentialCitationCount,authors,venue,publicationVenue,journal,url"

    def __init__(self, api_key: str):
        self.headers = {"x-api-key": api_key}

    def _make_request(self, endpoint: str, method: str = "GET", params: Dict[str, Any] = None, data: Dict[str, Any] = None) -> Dict[str, Any]:
        url = f"{self.BASE_URL}/{endpoint}"
        response = requests.request(method, url, headers=self.headers, params=params, json=data)
        response.raise_for_status()
        return response.json()

    def fetch_paper_details(self, paper_id: str) -> Dict[str, Any]:
        return self._make_request(f"paper/{paper_id}")

    def fetch_paper_details_batch(self, paper_ids: List[str], fields: str = DEFAULT_PAPER_FIELDS) -> List[Dict[str, Any]]:
        return self._make_request("paper/batch", method="POST", data={"ids": paper_ids, "fields": fields})

    def search_papers(self, query: str, limit: int = 10) -> Dict[str, Any]:
        return self._make_request("paper/search", params={"query": query, "limit": limit, "fields": self.DEFAULT_PAPER_FIELDS})

    def search_papers_batch(self, queries: List[str], limit: int = 10) -> List[Dict[str, Any]]:
        data = {"queries": [{"query": q, "limit": limit} for q in queries], "fields": self.DEFAULT_PAPER_FIELDS}
        return self._make_request("paper/search/batch", method="POST", data=data)

    def fetch_paper_references(self, paper_id: str, limit: int = 10) -> Dict[str, Any]:
        return self._make_request(f"paper/{paper_id}/references", params={"fields": "paperId,title,year,authors", "limit": limit})

    def fetch_paper_references_batch(self, paper_ids: List[str], fields: str = "paperId,title,year,authors", limit: int = 10) -> Dict[str, Any]:
        return self._make_request("paper/batch/references", method="POST", data={"ids": paper_ids, "fields": fields, "limit": limit})

    def fetch_paper_citations(self, paper_id: str, limit: int = 10) -> Dict[str, Any]:
        return self._make_request(f"paper/{paper_id}/citations", params={"fields": "paperId,title,year,authors", "limit": limit})

    def fetch_paper_citations_batch(self, paper_ids: List[str], fields: str = "paperId,title,year,authors", limit: int = 10) -> Dict[str, Any]:
        return self._make_request("paper/batch/citations", method="POST", data={"ids": paper_ids, "fields": fields, "limit": limit})

class BibTexHandler:
    @staticmethod
    def convert_entries_to_bibtex(entries: List[Dict[str, Any]]) -> str:
        db = BibDatabase()
        db.entries = entries
        writer = BibTexWriter()
        return writer.write(db)

    @staticmethod
    def parse_bibtex_content(bibtex_content: str) -> List[Dict[str, Any]]:
        parser = bibtexparser.bparser.BibTexParser(common_strings=True)
        bib_database = bibtexparser.loads(bibtex_content, parser)
        return bib_database.entries

class LaTeXHandler:
    @staticmethod
    def replace_latex_citations_with_uids(latex_text: str) -> str:
        def replace_citation(match):
            command, optional, citations = match.groups()
            optional = optional or ''
            if not citations:
                return f"\\{command}{optional}{{}}"
            replaced_citations = [f"{citation.strip()}_uid={str(uuid.uuid4())[:4]}" for citation in citations.split(',')]
            return f"\\{command}{optional}{{{', '.join(replaced_citations)}}}"

        citation_pattern = r'\\(cite|citet|citep)(\[.*?\])?{(.*?)}'
        return re.sub(citation_pattern, replace_citation, latex_text)

    @staticmethod
    def extract_latex_citations(latex_text: str) -> List[str]:
        citation_pattern = r'\\(?:cite|citet|citep)(?:\[.*?\])?{(.*?)}'
        matches = re.findall(citation_pattern, latex_text)
        return [citation.strip() for match in matches for citation in match.split(',')]

class GPTEnhancer:
    def __init__(self, api_key: str):
        self.openai_client = OpenAI(api_key=api_key)

    def enhance_paragraph_with_references(self, paragraph: str, bibliography: str) -> str:
        system_prompt = "You are an AI assistant that enhances academic writing."
        user_prompt = f"""
        Given the following LaTeX paragraph and bibliography, add appropriate references from the bibliography.
        You can either add references to existing \\cite, \\citep, or \\citet commands, or add new ones wherever needed.
        Attention: Make sure not to remove any existing citations. 
        Only print the output. 

        ```latex
        (tex with added references)
        ```

        Paragraph:
        {paragraph}

        Bibliography:
        {bibliography}
        """

        response = self.openai_client.chat.completions.create(
            model="gpt-4o-mini",  # Replace with the appropriate model
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=3000,
        )

        enhanced_text = response.choices[0].message.content
        latex_match = re.search(r'```latex\s*([\s\S]*?)\s*```', enhanced_text)
        return latex_match.group(1) if latex_match else None

    def analyze_references(self, paragraph_with_refs: str, parsed_bibliography: List[Dict[str, Any]]) -> Dict[str, Any]:
        citations = LaTeXHandler.extract_latex_citations(paragraph_with_refs)
        relevant_entries = [entry for entry in parsed_bibliography for ref in set(citations) if entry['ID'] == ref]
        references_bibtex = BibTexHandler.convert_entries_to_bibtex(relevant_entries)
        paragraph_with_uids = LaTeXHandler.replace_latex_citations_with_uids(paragraph_with_refs)

        system_prompt = "You are an AI assistant that enhances academic writing."
        user_prompt = f"""
        Given the following LaTeX and bibliography, add appropriate explanations for each unique key,
        and explain if it is a good citation or not. Give a score of 0-100% in terms of being a good citation
        Try to be extremely critical and judicious, and use a low score if a citation is not clearly relevant to the context or not.

        Output must be in a JSON format, as an array of items:

        ```json
        [{{
            "citation_id": "(the BibTeX ID)",
            "uid": "(uid)",
            "score": "(score)",
            "explanation": "(Explaining if this is a good citation, and if yes, why)"
        }}]
        ```

        Paragraph: 
        {paragraph_with_uids}

        Bibliography:
        {references_bibtex}
        """

        response = self.openai_client.chat.completions.create(
            model="gpt-4o",  # Replace with the appropriate model
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            max_tokens=2000,
        )

        analysis_text = response.choices[0].message.content
        json_match = re.search(r'```json\s*([\s\S]*?)\s*```', analysis_text)
        explanations = json.loads(json_match.group(1)) if json_match else []
        return {
            'paragraph': paragraph_with_uids, 
            'explanations': explanations,
        }

semantic_scholar_api = SemanticScholarAPI(os.getenv('S2_API_KEY'))
gpt_enhancer = GPTEnhancer(os.getenv('OPENAI_API_KEY'))

with open('sample2.tex', 'r') as f:
    paragraph = f.read()

with open('sample.bib', 'r') as f:
    bibliography = f.read()

parsed_bibliography = BibTexHandler.parse_bibtex_content(bibliography)

enhanced_paragraph = gpt_enhancer.enhance_paragraph_with_references(paragraph, bibliography)
print('\nEnhanced paragraph:')
print(enhanced_paragraph)

reference_analysis = gpt_enhancer.analyze_references(enhanced_paragraph, parsed_bibliography)
print('\nReference analysis:')
print(json.dumps(reference_analysis, indent=2))



Enhanced paragraph:
\chapter{Introduction}

% \subsection{The Rise and Challenges of Deep Neural Networks}
Deep neural networks have revolutionized the field of artificial intelligence, achieving unprecedented performance in a wide range of tasks, from image recognition to natural language processing \cite{zhang2019root, krizhevsky2012imagenet}. Despite their remarkable success, these models often remain enigmatic, functioning as ``black boxes'' that transform inputs into outputs through a complex series of non-linear operations. This opacity poses significant challenges for researchers and practitioners, as it hinders our ability to fully understand and optimize these powerful systems \cite{castelvecchi2016can}.

At the heart of the deep learning paradigm lies the concept of signal propagation—the journey of information as it flows through the layers of a neural network during both forward and backward passes. Understanding this process is crucial for several reasons. It provides ins

In [None]:
paper_search_batch("