<a href="https://colab.research.google.com/github/jogis0/LLM_Labs/blob/master/LLM_Homework3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#3.1

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from huggingface_hub import login

huggingface_model = "Gensyn/Qwen2.5-0.5B-Instruct" # microsoft/Phi-3-mini-4k-instruct openai-community/gpt2 TinyLlama/TinyLlama-1.1B-Chat-v1.0
login(token="<HF_TOKEN>")


model = AutoModelForCausalLM.from_pretrained(huggingface_model, torch_dtype=torch.float32, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(huggingface_model, trust_remote_code=True)


In [None]:
import matplotlib.pyplot as plt
import os

def get_all_weights(model):
    weights = []
    for name, param in model.named_parameters():
        if param.requires_grad and 'weight' in name:
            weights.append(param.data.view(-1))
    return torch.cat(weights)

original_weights = get_all_weights(model)

def print_model_size(mdl):
    torch.save(mdl.state_dict(), "temp_weights.pt")
    size_mb = os.path.getsize("temp_weights.pt") / float(2**20)
    print(f"Model size: {size_mb:.2f} MB")
    os.remove("temp_weights.pt")

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(original_weights.cpu().type(torch.float32).numpy(), bins=1500, color='skyblue')
plt.title("Original Model Weight Distribution")
plt.xlabel("Weight Value")
plt.ylabel("Frequency")
plt.grid(True)
plt.xlim([-0.2, 0.2])
plt.show()
print_model_size(model)


In [None]:
import gc

original_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
story_prompt = "Finish the story: Once upon a time there was a boy"
messages = [
    {
        "role": "user",
        "content": story_prompt,
    }
]

generation_args = {
    "max_new_tokens": 50,
    "return_full_text": False
}

original_result = original_pipe(messages, **generation_args)
original_response_pipe = original_result[0]['generated_text']

print(f"Original model response:\n{original_response_pipe}")
del original_pipe
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
import torch.quantization

quantized_model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Linear}, # layers
    dtype=torch.qint8 # quantization type
)

del model
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
quantized_weights = get_all_weights(quantized_model)

plt.figure(figsize=(10, 6))
plt.hist(quantized_weights.cpu().type(torch.float32).numpy(), bins=1500, color='orange')
plt.title("Quantized Model Weights Distribution")
plt.xlabel("Weight Value")
plt.ylabel("Frequency")
plt.grid(True)
plt.xlim([-0.2, 0.2])
plt.show()
print_model_size(quantized_model)

In [None]:
dequantized_model = torch.quantization.quantize_dynamic(
    quantized_model,
    {torch.nn.Linear}, # layers
    dtype=torch.float32 # quantization type
)

dequantized_pipe = pipeline("text-generation", model=dequantized_model, tokenizer=tokenizer)
dequantized_result = dequantized_pipe(messages, **generation_args)
dequantized_response_pipe = dequantized_result[0]['generated_text']

print(f"Quantized model response:\n{dequantized_response_pipe}")

#3.2

In [None]:
pip install pandas sentence-transformers scikit-learn

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, manhattan_distances
import numpy as np

model = SentenceTransformer('all-mpnet-base-v2')

with open("paragraphs.txt", "r", encoding="utf-8") as f:
    original_paragraphs = [p.strip() for p in f.read().split('\n') if p.strip()]

chunk_size = max(1, len(original_paragraphs) // 10)
chunks = [' '.join(original_paragraphs[i:i + chunk_size]) for i in range(0, len(original_paragraphs), chunk_size)]

chunk_vectors = model.encode(chunks)
df = pd.DataFrame({'chunk': chunks, 'vectors': list(chunk_vectors)})

with open("new_paragraphs.txt", "r", encoding="utf-8") as f:
    new_paragraphs = [p.strip() for p in f.read().split('\n') if p.strip()]

new_vectors = model.encode(new_paragraphs)

for i, new_para in enumerate(new_paragraphs):
    # Cosine
    distances = cosine_similarity([new_vectors[i]], chunk_vectors)[0]
    best_idx = np.argmax(distances)

    # Euclidean
    # distances = euclidean_distances([new_vectors[i]], chunk_vectors)[0]
    # best_idx = np.argmin(distances)

    # Manhattan
    # distances = manhattan_distances([new_vectors[i]], chunk_vectors)[0]
    # best_idx = np.argmin(distances)

    print(f"\n=== New Paragraph {i+1} ===")
    print(new_para)
    print("\n--- Most Similar Original Chunk ---")
    print(df.iloc[best_idx]['chunk'])
    print(f"\n(Distance: {distances[best_idx]:.4f})")
