In [None]:
import os
import requests 
from dotenv import load_dotenv
# from scraper import fetch_website_contents #not needed as extracting using jina ai
from IPython.display import Markdown, display
from openai import OpenAI

In [None]:
paper_url = 'https://rsisinternational.org/journals/ijrsi/articles/ai-powered-automated-and-portable-device-for-retinal-health-assessment/#:~:text=and%20portable%20solution%20using%20a,It%20is%20a'

In [None]:
!ollama pull llama3.2:3b
# or any other model you want to use for summari

In [None]:
#!ollama rm llama3.2:3b # Remove the  model if not working or to free up space

In [None]:
MODEL = "llama3.2:3b"
OLLAMA_BASE_URL = "http://localhost:11434/v1"
ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [None]:
system_prompt = """
You are a precise and expert research paper analyst.
Given the scraped content of a research paper webpage, extract and present ONLY the following sections in structured markdown.
Do NOT invent or assume anything not present in the content.

## Paper Title
The full title of the paper.

## Authors
A list of all authors.

## Methodology
How the research was conducted â€” the approach, experimental setup, and evaluation methods used.

## Components / Technologies Used
All hardware components, software frameworks, machine learning models, libraries, datasets, sensors, or tools mentioned.

## PDF Link
The direct link to the PDF version of the paper if present in the content. Otherwise write: *Not found in content.*

## References
A numbered list of the key references cited in the paper.

## Research Gap
The specific gap, limitation, or unsolved problem in existing research that this paper identifies and addresses.

Respond in clean markdown. Do not wrap the markdown in a code block.
"""

In [None]:
user_prompt_prefix = """
Below is the scraped content of a research paper webpage.
Extract and structure all the requested information exactly as instructed in the system prompt.

---

"""

In [None]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + website}
    ]

In [None]:
def fetch_website_contents(url):
    load_dotenv()
    api_key = os.getenv("JINA_API_KEY")

    jina_url = f"https://r.jina.ai/{url}"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "X-Engine": "browser"
    }

    response = requests.get(jina_url, headers=headers)
    response.raise_for_status()
    return response.text

In [None]:
def summarize(url):
    content = fetch_website_contents(url)
    response = ollama.chat.completions.create(
        model=MODEL,
        messages=messages_for(content)
    )
    return response.choices[0].message.content

In [None]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [None]:
display_summary(paper_url)