In [1]:
import os 
os.chdir("C:/Users/sergi/Documents/Py/booksum")

import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from text_importing import read_text_file  # Ensure this function reads the file correctly
import torch.nn.utils.prune as prune


# Read the text file
text = read_text_file("PrideandPrejudice/02.txt")

# Define the model ID
model_id = "meta-llama/Llama-3.2-3B-Instruct"

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Encode the text to get tokens
tokens = tokenizer.encode(text)

# Get the number of tokens
num_tokens = len(tokens)

model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)

def count_zero_weights(model):
    zero_count = 0
    total_count = 0
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            total_count += module.weight.numel()  # Total number of weights
            zero_count += (module.weight == 0).sum().item()  # Count of zero weights
    return zero_count, total_count
# Count zero weights in the original model
zero_weights_original, total_weights_original = count_zero_weights(model)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
###Pruning Linear Layers
def prune_model(model, amount):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
            prune.remove(module, 'weight')  # Remove the pruning reparameterization to finalize it
    return model

LLAMA_pruned_V1=prune_model(model, 0.2)


In [3]:
pruned_model_path = "C:/Users/sergi/Documents/Py/booksum/pruned_20_LLAMA32_3B"
LLAMA_pruned_V1.save_pretrained(pruned_model_path)
tokenizer.save_pretrained(pruned_model_path)

('C:/Users/sergi/Documents/Py/booksum/pruned_20_LLAMA32_3B\\tokenizer_config.json',
 'C:/Users/sergi/Documents/Py/booksum/pruned_20_LLAMA32_3B\\special_tokens_map.json',
 'C:/Users/sergi/Documents/Py/booksum/pruned_20_LLAMA32_3B\\tokenizer.json')

In [4]:
def count_zero_weights(model):
    zero_count = 0
    total_count = 0
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            total_count += module.weight.numel()  # Total number of weights
            zero_count += (module.weight == 0).sum().item()  # Count of zero weights
    return zero_count, total_count




In [5]:
# Count zero weights in the pruned model
zero_weights_pruned, total_weights_pruned = count_zero_weights(LLAMA_pruned_V1)
print(f"Zero weights in pruned model: {zero_weights_pruned} out of {total_weights_pruned}")
print(f"Zero weights in OG model: {zero_weights_original} out of {total_weights_original}")


Zero weights in pruned model: 642514994 out of 3212574720
Zero weights in OG model: 0 out of 3212574720


In [6]:
###Importing a function that reads texts and the uploading the pride a prejudice 1st episode
from text_importing import read_text_file  # Ensure this function reads the file correctly

# Read the text file
text = read_text_file("PrideandPrejudice/02.txt")


In [7]:
##Building the prompt that we will pass on the pruned and the og models
prompt = f"""
You are a text summarization expert with a talent for transforming complex information into clear and concise bullet points. You have extensive experience across various fields, which allows you to identify key events and relevant details accurately.
Your task is to summarize the provided text into a list of bullet points. Focus only on the content given, emphasizing the most essential details. Make sure the summary is well-organized and highlights critical events or relevant information.
Please return the summary as a list of not more than 7 bullet points, where each point captures an important event or piece of information from the text.
The bullet points cannot exceed one line each.
Here is the text to summarize:
{text}
"""

In [8]:
##Testing OG: 
# Load the pruned model
# Define the model ID
model_id = "meta-llama/Llama-3.2-3B-Instruct"

# Initialize the tokenizerconda
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Encode the text to get tokens
tokens = tokenizer.encode(text)

# Get the number of tokens
num_tokens = len(tokens)


# Initialize the pipeline
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

####Prepare the summaries
with torch.no_grad():
    summary_OG = pipe(
        prompt,
        max_new_tokens=int(num_tokens),
        return_full_text=False  # Ensuring that the output is not too short
    )

output_OG = summary_OG[0]["generated_text"]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [9]:
output_OG=output_OG.split("bullet points:")[-1].strip()
print(output_OG)

• It is a truth universally acknowledged that a single man with a good fortune must be in want of a wife.
• Mrs. Bennet learns that Netherfield Park has been rented by a young man named Bingley from the north of England.
• Bingley is a single man of large fortune, with an annual income of four to five thousand pounds.
• Mrs. Bennet believes that Bingley may fall in love with one of her daughters and encourages her husband to visit him.
• Mr. Bennet is skeptical and thinks that visiting Bingley will be a waste of time, but ultimately agrees to go.
• The Bennet family is eager to meet Bingley and secure a marriage for their daughters.
• Mrs. Bennet's primary goal is to get her daughters married, and she is willing to do whatever it takes to achieve this goal.


In [10]:
##Testing pruned: 

# Load the pruned model
pruned_model = AutoModelForCausalLM.from_pretrained(pruned_model_path, torch_dtype=torch.bfloat16)
pruned_tokenizer = AutoTokenizer.from_pretrained(pruned_model_path)

# Initialize the pipeline with the pruned model and tokenizer
pipe = pipeline(
    "text-generation",
    model=pruned_model,
    tokenizer=pruned_tokenizer,
    device=0
)

# Prepare the summaries with no gradient tracking
with torch.no_grad():
    summary_pruned = pipe(
        prompt,
        max_new_tokens=int(num_tokens),
        return_full_text=False  # Ensuring that the output is not too short
    )

# Extract the generated text
output_pruned = summary_pruned[0]["generated_text"]


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [11]:
output_pruned=output_pruned.split(":")[-1].strip()
print(output_pruned)

• The arrival of a single man with a good fortune in the neighborhood sets off a chain of events related to marriage and social standing.
• Mrs. Bennet is eager to inform her husband about the new resident, Netherfield Park, which is being occupied by a young man named Bingley.
• Bingley is a single man of large fortune, with a yearly income of four or five thousand pounds, which sparks interest in the Bennet family's daughters for marriage.
• The Bennet family is urged to visit Bingley to facilitate potential marriages between their daughters and him.
• Mr. Bennet is reluctant to visit Bingley, but his wife persists, suggesting that he should do so to secure advantageous marriages for their daughters.
• The Bennet family's dynamics and personalities are revealed through their interactions with each other and with their daughters.
• The main character, Mrs. Bennet, is portrayed as a woman driven by her desire to marry off her daughters and secure their social standing, while her husban

In [13]:
# Tokenize the outputs to get input IDs

tokens_OG = tokenizer(output_OG, return_tensors='pt')

device = torch.device("cpu")

tokens_pruned = pruned_tokenizer(output_pruned, return_tensors='pt').to(device)
pruned_model=pruned_model.to(device)

# Tokenize inputs and move tensors to CUDA
# tokens_OG = tokenizer(output_OG, return_tensors='pt').to(device)
# tokens_pruned = pruned_tokenizer(output_pruned, return_tensors='pt').to(device)

# Obtain embeddings from the models
with torch.no_grad():
    # For the original model
    outputs_OG = model(**tokens_OG, output_hidden_states=True)
    embeddings_OG = outputs_OG.hidden_states[-1].mean(dim=1)  # Get last layer and mean pooling

    # For the pruned model
    outputs_pruned = pruned_model(**tokens_pruned, output_hidden_states=True)
    embeddings_pruned = outputs_pruned.hidden_states[-1].mean(dim=1)

# Convert embeddings to float32 to ensure compatibility with dot product operations
embeddings_OG = embeddings_OG.float()
embeddings_pruned = embeddings_pruned.float()

# Compute the dot product of the two embeddings
dot_product = torch.dot(embeddings_OG.squeeze(), embeddings_pruned.squeeze())
print(f"Dot Product: {dot_product.item()}")

# Calculate cosine similarity
# Normalize the embeddings
norm_embeddings_OG = torch.norm(embeddings_OG)
norm_embeddings_pruned = torch.norm(embeddings_pruned)

# Cosine similarity calculation
cosine_similarity = dot_product / (norm_embeddings_OG * norm_embeddings_pruned)

print(f"Cosine Similarity: {cosine_similarity.item()}")

Dot Product: 2335.75732421875
Cosine Similarity: 0.9668498635292053
