In [1]:
!pip install -q  torch bitsandbytes transformers sentencepiece accelerate gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m73.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# imports

import os
from IPython.display import Markdown, display, update_display
from google.colab import drive
from huggingface_hub import login
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig, TextStreamer
import torch
import gradio as gr

In [3]:
# Constants
FALCON = "tiiuae/Falcon3-7B-Base"

In [4]:
# Sign in to HuggingFace Hub

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [10]:
system_message = """
You are an expert in simplifying French text according to the Facile à Lire et à Comprendre (FALC) guidelines.
Your task is to rewrite sentences so that they are easy to understand while preserving the original meaning and key information.

Follow these guidelines for simplification:
- Use simple, common vocabulary.
- Write short, clear sentences.
- Avoid complex grammar and jargon.
- Retain the original meaning and key information.
- Ensure grammatical correctness.

Important Instructions:
- ONLY provide the simplified sentence in your response.
- Do NOT include explanations, questions, or additional context.
- The response must remain in French.
"""



In [6]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [11]:
def generate_with_model(standard_sentence):
    """
    Generates a simplified text output using a pretrained model with proper memory management.
    Handles both chat template and non-chat template models.

    Args:
        standard_sentence (str): The dynamic user-provided input (standard sentence to simplify).

    Returns:
        str: Simplified output text.
    """
    import torch
    import gc

    # Step 1: Define the model path
    model_path = FALCON

    try:
        # Step 2: Clear CUDA cache and garbage collect
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        # Step 3: Load the tokenizer
        print("Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        tokenizer.pad_token = tokenizer.eos_token
        print("Tokenizer loaded successfully.")

        # Step 4: Check for GPU availability
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Device selected: {device}")

        # Step 5: Prepare the messages and inputs
        print("Preparing inputs...")
        prompt = (
            "Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. "
            "The response must be in French and consist ONLY of the simplified sentence. "
            "Do not include explanations, questions, or unrelated content.\n\n"
            "### Examples:\n"
            "Standard Sentence: La voiture avance rapidement sur la route.\n"
            "Simplified Sentence: La voiture roule vite sur la route.\n\n"
            "Standard Sentence: Les enfants jouent bruyamment dans le parc.\n"
            "Simplified Sentence: Les enfants font du bruit dans le parc.\n\n"
            f"### Standard Sentence:\n{standard_sentence}\n\n"
            "### Simplified Sentence:"
        )

        # Directly encode the prompt without using chat templates
        print("Tokenizing input...")
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512  # Add a max length to prevent too long sequences
        ).to(device)

        # Step 6: Load model with memory-efficient settings
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            device_map="auto" if torch.cuda.is_available() else None,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            low_cpu_mem_usage=True
        )

        # Step 7: Set up streamer
        streamer = TextStreamer(tokenizer)

        # Step 8: Generate with memory-efficient settings
        print("Generating outputs...")
        with torch.inference_mode():
            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                max_new_tokens=200,
                streamer=streamer,
                do_sample=True,
                temperature=0.7,
                pad_token_id=tokenizer.pad_token_id,
                bos_token_id=tokenizer.bos_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )

        # Step 9: Decode outputs
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the simplified sentence
        if "### Simplified Sentence:" in result:
          simplified_sentence = result.split("### Simplified Sentence:")[-1].strip()
        else:
        # Fallback in case the response deviates
          simplified_sentence = result.strip()

        # Step 10: Clean up
        del model
        del inputs
        del outputs
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

        return simplified_sentence

    except Exception as e:
        # Clean up in case of error
        if 'model' in locals():
            del model
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()
        raise Exception(f"Error during model generation: {str(e)}")

In [12]:
import gradio as gr

# Define the Gradio interface function
def gradio_interface(standard_sentence):
    """
    Interface function for Gradio to simplify a sentence using the model.

    Args:
        standard_sentence (str): The input sentence to simplify.

    Returns:
        str: Simplified sentence.
    """
    # Call the generate_with_model function with the user-provided input
    return generate_with_model(standard_sentence)

# Define Gradio components
input_text = gr.Textbox(
    lines=3,
    placeholder="Enter the standard sentence to simplify...",
    label="Input Standard Sentence"
)
output_text = gr.Textbox(
    lines=3,
    placeholder="Simplified sentence will appear here...",
    label="Simplified Sentence"
)

# Create the Gradio interface
interface = gr.Interface(
    fn=gradio_interface,         # Function to call
    inputs=input_text,           # Input component
    outputs=output_text,         # Output component
    title="French Text Simplifier",  # App title
    description=(
        "Simplify French text according to the Facile à Lire et à Comprendre (FALC) guidelines. "
        "Enter a standard sentence, and the app will rewrite it to make it easier to understand."
    )
)

# Launch the Gradio app
interface.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a8f11f7517874e9f58.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Loading tokenizer...
Tokenizer loaded successfully.
Device selected: cuda
Preparing inputs...
Tokenizing input...
Loading model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating outputs...
Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. The response must be in French and consist ONLY of the simplified sentence. Do not include explanations, questions, or unrelated content.

### Examples:
Standard Sentence: La voiture avance rapidement sur la route.
Simplified Sentence: La voiture roule vite sur la route.

Standard Sentence: Les enfants jouent bruyamment dans le parc.
Simplified Sentence: Les enfants font du bruit dans le parc.

### Standard Sentence:
La sueur coulait sur mes joues.

### Simplified Sentence:
La sueur descendait sur mes joues.<|endoftext|>
Loading tokenizer...
Tokenizer loaded successfully.
Device selected: cuda
Preparing inputs...
Tokenizing input...
Loading model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating outputs...
Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. The response must be in French and consist ONLY of the simplified sentence. Do not include explanations, questions, or unrelated content.

### Examples:
Standard Sentence: La voiture avance rapidement sur la route.
Simplified Sentence: La voiture roule vite sur la route.

Standard Sentence: Les enfants jouent bruyamment dans le parc.
Simplified Sentence: Les enfants font du bruit dans le parc.

### Standard Sentence:
Veuillez suivre les consignes à la lettre pour éviter tout problème

### Simplified Sentence:
Suivez les instructions à la lettre pour éviter tout problème.<|endoftext|>
Loading tokenizer...
Tokenizer loaded successfully.
Device selected: cuda
Preparing inputs...
Tokenizing input...
Loading model...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Generating outputs...
Simplify the following standard sentence into an easier-to-understand version while following FALC guidelines. The response must be in French and consist ONLY of the simplified sentence. Do not include explanations, questions, or unrelated content.

### Examples:
Standard Sentence: La voiture avance rapidement sur la route.
Simplified Sentence: La voiture roule vite sur la route.

Standard Sentence: Les enfants jouent bruyamment dans le parc.
Simplified Sentence: Les enfants font du bruit dans le parc.

### Standard Sentence:
L'apprentissage des mathématiques demande de la rigueur et de la patience

### Simplified Sentence:
L'apprentissage des mathématiques nécessite rigueur et patience.<|endoftext|>
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a8f11f7517874e9f58.gradio.live


