# Llama-3.1-8B-Instruct

In [None]:
%pip install -q transformers torch accelerate sentencepiece protobuf

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
warnings.filterwarnings('ignore')

In [None]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"cihaz{device}")

In [None]:
from huggingface_hub import login

token = "-"
login(token=token)


tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    low_cpu_mem_usage=True
)

if device == "cpu":
    model = model.to(device)


In [None]:
def generate_response(prompt, max_new_tokens=256, temperature=0.7, top_p=0.9):
    # Llama 3.1 chat template
    chat_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Senle satranç oynuyorum ve sen rakibimsin. Türkçe konuşuyorsun. Kısa, samimi ve doğal cevaplar ver.<|eot_id|><|start_header_id|>user<|end_header_id|>

{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""
    
    inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
    return response.strip()

In [None]:
user_prompt = "Merhaba! Nasılsın"

print(f"Sen: {user_prompt}")

response = generate_response(user_prompt, max_new_tokens=100)

print(f"Bot: {response}")

In [None]:

chess_prompt = "Az önce e4 oynadım. Ne düşünüyorsun bu hamle hakkında"

print(f"{chess_prompt}")
response = generate_response(chess_prompt, max_new_tokens=150)
print(f"\nBot: {response}")