In [1]:
import os
import google.generativeai as genai
from pdf2image import convert_from_path
from PIL import Image
from IPython.display import display
from IPython.display import Markdown
import textwrap
import re
import warnings
from dotenv import load_dotenv
import openai

warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [8]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
poppler_path = os.getenv('poppler_path')
openai.api_key = os.getenv('OPENAI_API_KEY')

# Configure the genai with the Google API key
genai.configure(api_key=GOOGLE_API_KEY)

model1 = genai.GenerativeModel('gemini-pro-vision')


In [4]:
def get_metadata(paper_path, model):
    
    # Convert PDF to images
    images = convert_from_path(paper_path, 600, poppler_path=poppler_path)

    # Save the first page as an image
    image_path ='page0.jpg'
    images[0].save(image_path, 'JPEG')

    # Load the saved image
    img = Image.open(image_path)

    # Define the prompt for the model
    prompt = '''
    Please find the answers for the following questions from the image:
    title_query = "What is the title of this paper?"
    authors_query = "Who are the authors of this paper? Use full names."
    year_query = "Could you please tell me when this paper was published?"
    journal_query = "Which journal was this paper published in?"
    volume_query = "What is the volume number?"
    issue_query = "What is the issue number?"
    pages_query = "What are the page numbers?"
    doi_query = "What is the DOI number?"
    '''

    # Generate content using the model (you should replace this with your actual model call)
    response = model.generate_content([prompt, img])

    # Extract and return the generated text
    return response.text

In [5]:
paper_path = "C:/Users/DELL/OneDrive/Documents/GitHub/Intellihack_final_Innovision/papers/2022_SD.pdf"
metadata = get_metadata(paper_path, model1)
display(f"Metadata: \n{metadata}")

'Metadata: \n title_query: Assessment of the Risk of Severe Dengue Using Intrahost Viral Population in Dengue Virus Serotype 2 Patients via Machine Learning\n\nauthors_query: Su-Ihen Huang1, Huey-Pin Tsai2,3, Ya-Fang Wang4, Wen-Chien Ko5,6, Jen-Ren Wang2,3,4,7 and Sheng-Wen Huang1\n\nyear_query: 2022\n\njournal_query: Frontiers in Cellular and Infection Microbiology\n\nvolume_query: 12\n\nissue_query: None\n\npages_query: 831281\n\ndoi_query: 10.3389/fcimb.2022.831281'

In [11]:
def get_reference(style, metadata, custom='custom'):
    
    # Remove 'Metadata:' if it exists
    metadata = re.sub(r'Metadata:\s*', '', metadata)
    
    # parse metadata string into a dictionary
    metadata_dict = {}
    lines = metadata.strip().split('\n')
    for line in lines:
        key_value = re.split(r':\s*', line, maxsplit=1)
        if len(key_value) == 2:
            metadata_dict[key_value[0].strip()] = key_value[1].strip()
    
    if style == "APA":
        # Define the prompt for the model
        prompt = '''You are an expert in APA citations. Please write a bibliography for metadata using the following template(Do not change punctuation marks):
        If DOI or URL is not given:
        Author, A. A., & Author, B. B. (Year of publication). Title of article in sentence case:
        Capitalize the first letter of the subtitle. Title of the Journal in Mixed Case and
        speacially write in italics, volume number inspeacially write in italics (issue number), pp. xx-xx.

        If it is given:
        Author, A. A., & Author, B. B. (Year of publication). Title of article in sentence case:
        Capitalize the first letter of the subtitle. Title of the Journal in Mixed Case and
        speacially write in italics, volume number speacially write in italics (issue number), pp. xx-xx. DOI or URL'''

    elif style == "IEEE":
        # Define the prompt for the model
        prompt = '''
        You are an expert in IEEE citations. Please write a bibliography using the following template. 
        Note that the example below should be replaced with specific metadata: 
        J. Carlson, D. Menicucci, P. Vorobieff, A. Mammoli, and H. He, "Infrared imaging method for flyby assessment 
        of solar thermal panel operation in field settings," Appl. Therm. Eng., vol. 70, no. 1, pp. 163-171, Sept. 2014. 
        Accessed Mar. 19, 2018. doi:10.1016/j.applthermaleng.2014.05.008. [Online]. 
        Available: https://www.sciencedirect.com/science/article/pii/S1359431114003561
        '''
    elif style == "HARVARD":
        prompt = '''
        You are an expert in HARVARD citations. Please write a bibliography using the following template. 
        Use below template.
        If DOI or URL is given: 
        Write Journal name in Italics
        A DOI specially should be written with the prefix https://doi.org/ followed by the DOI number
        Never put a full stop after a DOI or URL as it may be assumed that it is part of the
        DOI or URL and prevent it from working.
        • Enclose the title of the article in single quotation marks.
        • Capitalise the first letter of each of the main words of the journal title, but not the
        linking words such as "and", "for", "of" or "the"
        
        ex: Dobson, H. (2006) 'Mister Sparkle meets the 'Yakuza': depictions of Japan in The Simpsons', Journal of Popular Culture, 39(1), 
        pp. 44–68. doi:https://doi.org/10.1111/j.1540-5931.2006.00203.x
        '''

    else :
        prompt = custom

    # Generate content using the model
    response = response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": metadata}
        ],
        max_tokens=500
        )

    # Extract and return the generated text
    return response['choices'][0]['message']['content'].strip()

In [12]:
generated_reference = get_reference("APA", metadata)
Markdown(generated_reference)

Huang, S.-I., Tsai, H.-P., Wang, Y.-F., Ko, W.-C., Wang, J.-R., & Huang, S.-W. (2022). Assessment of the Risk of Severe Dengue Using Intrahost Viral Population in Dengue Virus Serotype 2 Patients via Machine Learning. *Frontiers in Cellular and Infection Microbiology*, *12*, 831281. https://doi.org/10.3389/fcimb.2022.831281

In [14]:
generated_reference = get_reference("IEEE", metadata)
Markdown(generated_reference)

Huang, S.-I., Tsai, H.-P., Wang, Y.-F., Ko, W.-C., Wang, J.-R., & Huang, S.-W. (2022). Assessment of the Risk of Severe Dengue Using Intrahost Viral Population in Dengue Virus Serotype 2 Patients via Machine Learning. Frontiers in Cellular and Infection Microbiology, 12, 831281. https://doi.org/10.3389/fcimb.2022.831281

In [13]:
generated_reference = get_reference("Custom", metadata, custom='Please write reference using this format: Author, F., Author, S., Author, T.: Article title. Journal {2}(5), 99--110 (2016), Please Stric into this format and Do not change punctuation marks, The volume number is enclosed in curly braces {} The issue number is enclosed in parentheses ()')
Markdown(generated_reference)

Huang, S., Tsai, H., Wang, Y., Ko, W., Wang, J., & Huang, S.: Assessment of the Risk of Severe Dengue Using Intrahost Viral Population in Dengue Virus Serotype 2 Patients via Machine Learning. Frontiers in Cellular and Infection Microbiology {12}(5), 831281 (2022)