In [1]:
%%capture
!pip install unsloth
!pip install pypdf
!pip install pymupdf4llm
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "RaspRi/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

Unsloth 2024.9.post4 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [27]:
import os
from functools import wraps
import pymupdf4llm
from pypdf import PdfReader
import requests


def download_file_from_url(func):
    """Decorator that downloads a file from a URL and passes the file path to the function.

    Args:
        func: The function to wrap.

    Returns:
        The wrapped function.
    """

    @wraps(func)
    def wrapper(url, *args, **kwargs):
        """Wrapper function that handles downloading and cleanup.

        Args:
            url: The URL to download the file from.
            *args: Additional arguments to pass to the wrapped function.
            **kwargs: Additional keyword arguments to pass to the wrapped function.

        Returns:
            The result of the wrapped function.
        """
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise an HTTPError for bad responses
            temp_file_path = "temp.pdf"
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(response.content)
            result = func(temp_file_path, *args, **kwargs)
            return result

        except requests.exceptions.RequestException as e:
            print(f"Error downloading file: {e}")
            return None

        except Exception as e:
            print(f"An error occurred: {e}")
            return None

        finally:
            if os.path.exists(temp_file_path):
                os.remove(temp_file_path)

    return wrapper


@download_file_from_url
def parse_pdf_to_markdown(file_path):
    """Parses a PDF file and converts it to markdown text.

    Args:
        file_path: The path to the PDF file.

    Returns:
        The markdown text.
    """
    markdown_text = pymupdf4llm.to_markdown(file_path)
    # markdown_text = ""
    # pdf = PdfReader(file_path)
    # for page in pdf.pages:
    #     markdown_text += page.extract_text() + "\n"
    return markdown_text

In [28]:
DistillBERT = "https://arxiv.org/pdf/1910.01108"
QLoRA = "https://arxiv.org/pdf/2305.14314"
RoPE = "https://arxiv.org/pdf/2104.09864"
markdown = parse_pdf_to_markdown(RoPE)

Processing temp.pdf...


In [29]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

FastLanguageModel.for_inference(model)


prompt = markdown + """\n
Summarize this paper using the following structure:

1. Summary: Write a brief overview of the paper in 2-3 sentences.

2. Problem: State the main problem or research question addressed.

3. Objectives: List the specific aims or goals of the study.

4. Methods: Describe the key methodologies used.

5. Results: Summarize the main findings.

6. Conclusions: State the primary conclusions drawn by the authors.

7. Impact: Briefly mention the potential impact or implications of the research.

For each section, extract and paraphrase the most crucial information from the paper. Aim for conciseness while maintaining accuracy.

Additional guidelines:
- Use bullet points for the Highlights section.
- Keep each section concise, focusing only on the most critical information.
- Use objective language, avoiding personal opinions or evaluations.
- When possible, use quantitative data to support key points.
- Maintain the original terminology used in the paper.
- If the paper lacks information for a particular section, you may omit that section.

Important: Your summary should consist primarily of information extracted and paraphrased from the original text. Avoid introducing new ideas or interpretations not present in the paper.

Aim for a total summary length of approximately 300-500 words, adjusting based on the complexity and length of the original paper.

Format the output with clear section headings and proper spacing between sections.
"""
messages = [
    {"role": "user", "content": prompt},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 1024,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

Summary: 

The paper proposes a new position encoding method, named Rotary Position Embedding (RoPE), to effectively leverage positional information in transformer architectures.

Problem:

The paper investigates various approaches to integrate positional information into the learning process of transformer-based language models, aiming to address the challenge of handling the order of words.

Objectives:

The study's main goal is to develop a novel position encoding method to address the issue of positional information in transformer-based models.

Methods: 

The approach involves encoding the absolute position with a rotation matrix and incorporating the explicit relative position dependency in self-attention formulations.

Results: 

Experimental results on various long text classification benchmark datasets show that the proposed RoFormer outperforms its alternatives in terms of accuracy, indicating that RoPE has valuable properties, including flexibility in sequence length, decayi