In [None]:
!pip install -q torch bitsandbytes transformers sentencepiece accelerate gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# imports

import os
from IPython.display import Markdown, display, update_display
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, TextStreamer
import torch
import gradio as gr

In [None]:
# Constants
LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [None]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [None]:
# Sign in to OpenAI using Secrets in Colab

openai_api_key = userdata.get('OPENAI_API_KEY')
openai = OpenAI(api_key=openai_api_key)

In [None]:
system_message = """
You are an expert in simplifying French text according to the Facile à Lire et à Comprendre (FALC) guidelines.
Your task is to rewrite sentences so that they are easy to understand while preserving the original meaning and key information.

Follow these guidelines for simplification:
- Use simple, common vocabulary.
- Write short, clear sentences.
- Avoid complex grammar and jargon.
- Retain the original meaning and key information.
- Ensure grammatical correctness.

Do not include explanations in the response. Only provide the simplified sentence(s).
"""


In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [None]:
def generate_with_model(standard_sentence):
    """
    Generates a simplified text output using a pretrained model with proper memory management.

    Args:
        standard_sentence (str): The dynamic user-provided input (standard sentence to simplify).

    Returns:
        str: Simplified output text.
    """
    import torch
    import gc

    # Step 1: Define the model path (static for the app)
    model_path = LLAMA  # Replace with your actual LLAMA model path

    try:
        # Step 2: Clear CUDA cache and garbage collect
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        # Step 3: Load the tokenizer
        print("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        tokenizer.pad_token = tokenizer.eos_token
        print("Tokenizer loaded successfully.")

        # Step 4: Check for GPU availability and set device
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Device selected: {device}")

        # Step 5: Prepare the messages dynamically
        print("Preparing inputs...")
        user_prompt = (
            f"Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. "
            f"Keep the original meaning and use simple vocabulary and grammar.\n\n"
            f"### Standard Sentence:\n{standard_sentence}\n\n"
            f"### Simplified Sentence:"
        )
        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt}
        ]

        # Step 6: Load model with memory-efficient settings
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            device_map="auto" if torch.cuda.is_available() else None,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use half precision on GPU
            low_cpu_mem_usage=True
        )

        # Step 7: Process inputs in smaller chunks if needed
        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)

        if tokenizer.pad_token_id is None:
            tokenizer.pad_token_id = tokenizer.eos_token_id

        # Step 8: Set up streamer
        streamer = TextStreamer(tokenizer)

        # Step 9: Generate with memory-efficient settings
        print("Generating outputs...")
        with torch.inference_mode():  # More memory efficient than no_grad
            outputs = model.generate(
                inputs,
                max_new_tokens=200,
                streamer=streamer,
                do_sample=True,
                temperature=0.7,
                pad_token_id=tokenizer.pad_token_id,
                bos_token_id=tokenizer.bos_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )

        # Step 10: Decode outputs
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Step 11: Clean up
        del model
        del inputs
        del outputs
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        return result

    except Exception as e:
        # Clean up in case of error
        if 'model' in locals():
            del model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()
        raise Exception(f"Error during model generation: {str(e)}")

In [None]:
import gradio as gr

# Define the Gradio interface function
def gradio_interface(standard_sentence):
    """
    Interface function for Gradio to simplify a sentence using the model.

    Args:
        standard_sentence (str): The input sentence to simplify.

    Returns:
        str: Simplified sentence.
    """
    # Call the generate_with_model function with the user-provided input
    return generate_with_model(standard_sentence)

# Define Gradio components
input_text = gr.Textbox(
    lines=3,
    placeholder="Enter the standard sentence to simplify...",
    label="Input Standard Sentence"
)
output_text = gr.Textbox(
    lines=3,
    placeholder="Simplified sentence will appear here...",
    label="Simplified Sentence"
)

# Create the Gradio interface
interface = gr.Interface(
    fn=gradio_interface,         # Function to call
    inputs=input_text,           # Input component
    outputs=output_text,         # Output component
    title="French Text Simplifier",  # App title
    description=(
        "Simplify French text according to the Facile à Lire et à Comprendre (FALC) guidelines. "
        "Enter a standard sentence, and the app will rewrite it to make it easier to understand."
    )
)

# Launch the Gradio app
interface.launch(debug=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://50ea3447e59c009bc9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Loading tokenizer...


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Tokenizer loaded successfully.
Device selected: cuda
Preparing inputs...
Loading model...


config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generating outputs...
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert in simplifying French text according to the Facile à Lire et à Comprendre (FALC) guidelines.
Your task is to rewrite sentences so that they are easy to understand while preserving the original meaning and key information.

Follow these guidelines for simplification:
- Use simple, common vocabulary.
- Write short, clear sentences.
- Avoid complex grammar and jargon.
- Retain the original meaning and key information.
- Ensure grammatical correctness.

Do not include explanations in the response. Only provide the simplified sentence(s).<|eot_id|><|start_header_id|>user<|end_header_id|>

Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. Keep the original meaning and use simple vocabulary and grammar.

### Standard Sentence:
La sueur coulait sur mes joues.

### Simplifi

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating outputs...
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert in simplifying French text according to the Facile à Lire et à Comprendre (FALC) guidelines.
Your task is to rewrite sentences so that they are easy to understand while preserving the original meaning and key information.

Follow these guidelines for simplification:
- Use simple, common vocabulary.
- Write short, clear sentences.
- Avoid complex grammar and jargon.
- Retain the original meaning and key information.
- Ensure grammatical correctness.

Do not include explanations in the response. Only provide the simplified sentence(s).<|eot_id|><|start_header_id|>user<|end_header_id|>

Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. Keep the original meaning and use simple vocabulary and grammar.

### Standard Sentence:
Les phénomènes météorologiques extrêmes devie

