**Using Falcon**

In [None]:
!pip install transformers datasets huggingface_hub bitsandbytes accelerate

In [None]:
from datasets import load_dataset

dataset = load_dataset("faizalnf1800/gpt3.5-scifi-story-prompt")
dataset = dataset['train']['prompt'][:2]

In [None]:
import pandas as pd
import bitsandbytes as bnb
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    load_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load the model and tokenizer
model_name = "ericzzz/falcon-rw-1b-instruct-openorca"
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Move the model to GPU
# device = 0
# model.to(device)

model.config.pad_token_id = model.config.eos_token_id

# Create a pipeline with the model and tokenizer on GPU
# generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Initialize lists to store prompts and completions
prompts = []
completions = []

# Inference loop
for i in range(len(dataset)):
    # Input text for generation
    prompt = dataset[i]

    # Generate text using the pipeline with additional parameters
    generated_text = generator(
        prompt,
        max_length=3000,
        min_length=1000,
        num_return_sequences=1,
        top_k=50,
        top_p=0.95,
        temperature=2.0,
        repetition_penalty=2.0,
        num_beams=5,
        length_penalty=0.8,
        pad_token_id=generator.tokenizer.eos_token_id,
        do_sample=True
    )[0]['generated_text']

    # Remove the prompt from the generated text
    generated_text = generated_text.replace(prompt, "").strip()

    # Ensure the length is within the desired range
    generated_text = generated_text

    # Append prompt and generated text to the lists
    prompts.append(prompt)
    completions.append(generated_text)

    # Print the prompt and generated text
    print(f"Generated Text loop {i}:\n {generated_text}\n")

# Create a DataFrame from the lists
df = pd.DataFrame({'prompt': prompts, 'completion': completions})

# Save the DataFrame to a CSV file
df.to_csv('falcon-1b-instruct-completion-scifi-story-prompt.csv', index=False)


In [None]:
# Free up CPU memory
!python3 -c "import psutil; psutil.virtual_memory().percent = 0"

# Free up GPU memory
!nvidia-smi -l | grep Memory | awk '{print $2}' | xargs sudo shuf -n1 /dev/null