# Dobby Mini Unhinged Plus - Llama 3.1 8B
Free inference using Hugging Face Transformers on Google Colab

**Steps:**
1. Runtime > Change runtime type > T4 GPU
2. Run all cells below

In [None]:
# Install required libraries
!pip install -q transformers accelerate torch bitsandbytes

In [None]:
# Import libraries
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [None]:
# Load model and tokenizer
model_name = "SentientAGI/Dobby-Mini-Unhinged-Plus-Llama-3.1-8B"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Loading model... (this may take a few minutes)")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_8bit=True  # Use 8-bit quantization to save memory
)

print("Model loaded successfully!")

In [None]:
# Function to generate response
def chat(prompt, max_length=512, temperature=0.7):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        temperature=temperature,
        do_sample=True,
        top_p=0.95,
        top_k=50,
        repetition_penalty=1.1
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [None]:
# Test the model
prompt = "Explain artificial intelligence in simple terms."
response = chat(prompt)
print("Prompt:", prompt)
print("\nResponse:")
print(response)

In [None]:
# Interactive chat loop
print("Starting interactive chat. Type 'quit' to exit.\n")

while True:
    user_input = input("You: ")
    
    if user_input.lower() in ['quit', 'exit', 'q']:
        print("Goodbye!")
        break
    
    response = chat(user_input)
    print(f"\nDobby: {response}\n")

## Alternative: Using Pipeline (Simpler)
If the above doesn't work, try this simpler approach:

In [None]:
from transformers import pipeline

# Create pipeline
pipe = pipeline(
    "text-generation",
    model="SentientAGI/Dobby-Mini-Unhinged-Plus-Llama-3.1-8B",
    device_map="auto",
    torch_dtype=torch.float16,
    model_kwargs={"load_in_8bit": True}
)

# Generate response
result = pipe(
    "What is the meaning of life?",
    max_new_tokens=256,
    temperature=0.7,
    do_sample=True
)

print(result[0]['generated_text'])