In [None]:
# Install various libraries using pip
!pip install google-generativeai tiktoken futures feedparser

In [None]:
# Import required libraries
import os
import re
import time
import requests
import tiktoken
from google.colab import userdata
import google.generativeai as genai
from concurrent.futures import ThreadPoolExecutor

In [None]:
genai.configure(api_key= userdata.get("GOOGLE_API_KEY"))

def gemini(prompt, max_tokens):
    # Call gemini 1.5 pro model
    gemini_model = genai.GenerativeModel('gemini-1.5-pro')
    response = gemini_model.generate_content(prompt, generation_config=genai.types.GenerationConfig(
        max_output_tokens=max_tokens,
        temperature=1,
    ))
    return response.text

In [None]:
EMAIL = userdata.get("EMAIL")

# Call the citeas API for references
def get_reference(doi):
  url = f"https://api.citeas.org/product/{doi}?email={EMAIL}"
  response = requests.get(url)

  data = response.json()
  try:
    reference = data["citations"][1]["citation"]
  except:
    reference = data["citations"][0]["citation"]
  reference = reference.replace("<i>", "").replace("</i>", "")
  return reference

In [None]:
# Count input tokens
def count_tokens(text):
    encoding = tiktoken.encoding_for_model("gpt-4")
    tokens = encoding.encode(text)
    return len(tokens)

In [None]:
# Make intext-citations according to number of authors
def get_intext_citation(reference):

    authors_list = []
    pattern = r'^(.*?)\s*,\s*(\d{4})'
    match = re.match(pattern, reference)
    if match:
        authors = match.group(1)
        year = match.group(2)

    if 'et al.,' in reference:
        surname_index = reference.find(',')
        surname = reference[:surname_index]
        match = re.search(r'\b\d{4}\b', reference)
        if match:
            year = match.group(0)
        intext_citation = f"{surname} et al. ({year})"
        return intext_citation

    elif authors != None:
      if '.,' not in authors and '&' in authors:
          authors_list = authors.split(" & ")
      elif '.,' not in authors and '&' not in authors:
          authors_list = authors.split(".,")
      else:
          authors_list = re.split(r',\s| & ', authors)

    if len(authors_list) == 1:
        surname = authors_list[0].split(",")
        if " " in surname:
            surname = surname.split(" ")[1]
        intext_citation = f"{surname[0]} ({year})"

    elif len(authors_list) == 2:
        author1_surname = authors_list[0].split(",")[0]
        if " " in author1_surname:
            author1_surname = author1_surname.split(" ")[1]
        author2_surname = authors_list[1].split(",")[0]
        if " " in author2_surname:
            author2_surname = author2_surname.split(" ")[1]
        intext_citation = f"{author1_surname} and {author2_surname} ({year})"

    else:
        surname = authors_list[0].split(",")[0]
        intext_citation = f"{surname} et al. ({year})"

    return intext_citation

In [None]:
from abc import ABC, abstractmethod

class BaseWebAPIDataLoader(ABC):
    def __init__(self, base_url):
        self.base_url = base_url

    @abstractmethod
    def fetch_data(self, search_query, **kwargs):
        pass

    def make_request(self, endpoint, params=None, headers=None):
        url = f"{self.base_url}{endpoint}"
        response = requests.get(url, params=params, headers=headers)
        print(url)
        print(params)
        print(headers)
        if response.status_code == 200:
            data = response.json()
            return data
        else:
            raise Exception(f"Failed to fetch data from API: {response.status_code}")

In [None]:
import jellyfish

# Get research papers from semantic scholar
class SemanticScholarLoader(BaseWebAPIDataLoader):
    SS_key = None
    def __init__(self,SS_key):
        self.SS_key = SS_key
        super().__init__("https://api.semanticscholar.org/graph/v1/paper/search")

    def fetch_data(self, search_query, limit=12, year_range=None):
        headers = {
            "x-api-key": self.SS_key
        }
        params = {
            "query": search_query,
            "limit": limit,
            "fields": "title,url,abstract,authors,citationStyles,journal,year,externalIds",
        }

        if year_range is not None:
            params["year"] = year_range

        data = self.make_request("", params=params, headers=headers)
        return data.get("data", [])

    def fetch_and_sort_papers(
        self,
        search_query,
        limit=100,
        top_n=100,
        year_range=None,
        weight_similarity=0.7,
    ):
        papers = []
        abstracts = []
        references = []
        papers.extend(self.fetch_data(research_question, limit, year_range))

        for paper in papers:
          abstract = paper.get("abstract", "")
          try:
            doi = paper["externalIds"]["DOI"]
          except:
            doi = None

          if abstract != None and doi != None:
              reference = get_reference(doi)
              if "(n.d.)." in reference and "Error: DOI Not Found" in reference:
                continue
              else:
                intext_citation = get_intext_citation(reference)
                references.append(reference)
                abstract = f"{abstract} in-text citations: {intext_citation}"
                abstracts.append(abstract)
          else:
            continue

        return abstracts, references

In [None]:
import feedparser

# Get research papers from arxiv
def search_arxiv(query):
    abstracts = []
    references = []
    base_url = "http://export.arxiv.org/api/query?"

    query_params = {
        "search_query": query,
        "max_results": 50
    }
    response = requests.get(base_url, params=query_params)
    feed = feedparser.parse(response.content)

    for result in feed.entries:
        arxiv_id = result.id
        abstract = result.summary
        if abstract != None and arxiv_id != None:
              reference = get_reference(arxiv_id)
              if "(n.d.)." in reference and "Error: DOI Not Found" in reference:
                continue
              else:
                intext_citation = get_intext_citation(reference)
                references.append(reference)
                abstract = abstract.replace("\n", "")
                abstract = f"{abstract} in-text citations: {intext_citation}"
                abstracts.append(abstract)

    return abstracts, references

In [None]:
# literature review prompt
literature_review_prompt = """
Write a coherent literature review from all provided research papers while addressing the research question "{research_question}" for the purpose to help researchers in their research paper.
Write professionally in a seamless flow. Must use all the provided research paper in the literature review. Add only one in-text citation per research paper from the attached intext_citations.
Write at least a few sentences for every citation and research paper.

Research Papers: {abstracts}
"""

In [None]:
def write_literature_review(abstracts, research_question):
    prompt = literature_review_prompt.format(research_question=research_question, abstracts=abstracts)

    # Calculate the tokens in the input
    input_tokens = count_tokens(prompt)
    print("Input tokens:", input_tokens)

    remaining_tokens = 128000 - input_tokens
    max_tokens = max(remaining_tokens, 0)

    literature_review = gemini(prompt, max_tokens=max_tokens)

    return literature_review

In [None]:
def generate_literature_review(research_question, SS_key):
    print(f"Research question: {research_question}")

    abstracts = []
    references = []

    with ThreadPoolExecutor(max_workers=2) as executor:
      arxiv = executor.submit(search_arxiv, research_question)
      semantic_scholar = executor.submit(SemanticScholarLoader(SS_key).fetch_and_sort_papers, research_question)

      try:
        abstracts.extend(arxiv.result()[0])
        references.extend(arxiv.result()[1])
      except:
        pass

      try:
        abstracts.extend(semantic_scholar.result()[0])
        references.extend(semantic_scholar.result()[1])
      except:
        pass

    if len(abstracts) == 0:
        print("No papers found for the given research question.")
        return
    literature_review_text = write_literature_review(abstracts, research_question)
    references_list = "\n".join([f"{i}. {reference}" for i, reference in enumerate(references, start=1)])
    literature_review_text += f"\n\nReferences: {references_list}"

    print("Literature review generated using", len(abstracts), "papers.")
    print("Literature Review:", literature_review_text)

    return literature_review_text

In [None]:
# Enter your research question here
research_question = input("Enter your research question: ")

In [None]:
# Semantic scholar API key
SS_key = userdata.get("SS_key")

In [None]:
literature_review = generate_literature_review(research_question, SS_key)