## Embeddings in the sentence balancing task

**How much does positive,negative adjective pairs correlated compared to random adjective pairs**

In [6]:
r1_adj=["rich","smart","ugly","fast","long","cheap","clear"]
r2_adj=["bright","cold","sweet","cold","bad","weak","good"]

pos_adj=["bright","hot","sweet","warm","bad","strong","good"]
neg_adj=["dark","cold","bitter","cold","good","weak","bad"]
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
import numpy as np
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

import torch
emb_pos=torch.empty((768,7))
emb_neg=torch.empty((768,7))
r1_emb=torch.empty((768,7))
r2_emb=torch.empty((768,7))
pos_neg_pair=list()
r1_r2_pair=list()
for i in range(len(pos_adj)):

    pos_adj_id = tokenizer.encode(" "+pos_adj[i], return_tensors='pt')
    neg_adj_id = tokenizer.encode(" "+neg_adj[i], return_tensors='pt')
    r1_adj_id = tokenizer.encode(" "+r1_adj[i], return_tensors='pt')
    r2_adj_id = tokenizer.encode(" "+r2_adj[i], return_tensors='pt')
    pos_adj_emb = model.wte(pos_adj_id).detach().reshape(-1).numpy()
    neg_adj_emb = model.wte(neg_adj_id).detach().reshape(-1).numpy()
    emb_pos[:,i]=model.wte(pos_adj_id).detach()
    emb_neg[:,i]=model.wte(neg_adj_id).detach()

    r1_adj_emb = model.wte(r1_adj_id).detach().reshape(-1)
    r2_adj_emb = model.wte(r2_adj_id).detach().reshape(-1)
    r1_emb[:,i]=r1_adj_emb
    r2_emb[:,i]=r2_adj_emb
    pos_neg_pair.append(np.corrcoef(neg_adj_emb,pos_adj_emb)[0][1])
    r1_r2_pair.append(np.corrcoef(r1_adj_emb,r2_adj_emb)[0][1])


Downloading (â€¦)lve/main/config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

In [5]:
word_embeddings = model.wte.weight
batch_size = 500

# Variable to accumulate cosine similarities
total_cosine_similarity = 0.0

# Calculate mean pairwise cosine similarity in batches to avoid memory overflow
for i in range(0, word_embeddings.size(0), batch_size):
    batch = word_embeddings[i:i+batch_size, :]
    norm = batch.norm(p=2, dim=1, keepdim=True)
    batch_normalized = batch.div(norm)
    similarity_matrix = torch.mm(batch_normalized, batch_normalized.transpose(0,1))
    
    # Exclude self-similarities (diagonal elements) and duplicates (upper triangular elements)
    similarity_sum = torch.tril(similarity_matrix, diagonal=-1).sum()
    
    # Update total cosine similarity
    total_cosine_similarity += similarity_sum.item()

# Calculate mean cosine similarity
num_pairs = (word_embeddings.size(0)*(word_embeddings.size(0)-1))/2
mean_cosine_similarity = total_cosine_similarity / num_pairs

print(f"Mean pairwise cosine similarity: {mean_cosine_similarity}")


Mean pairwise cosine similarity: 0.0027156149655636415


In [2]:
import pandas as pd
pd.DataFrame(emb_neg).to_csv("neg_emb.csv")

In [28]:
mean_cor_pos_neg=np.array(pos_neg_pair).mean()
mean_cor_random=np.array(r1_r2_pair).mean()

print(f"Mean correlation of random adjectives: {mean_cor_random}")
print(f"Mean correlation of positive and negative adjectives: {mean_cor_pos_neg}")


Mean correlation of random adjectives: 0.3393381970372445
Mean correlation of positive and negative adjectives: 0.5797392398466


### How well does regression change the polarity of the adjectives

In [78]:
from sklearn.linear_model import LinearRegression
coefficients = []
X=emb_pos
y=emb_neg
# Fit a linear regression model without intercept for each feature
for feature_index in range(X.shape[1]):
    # Select the current feature
    X_feature = X[:, feature_index].reshape(-1, 1)
    y_t=y[:,feature_index]
    # Create a linear regression object without intercept
    regressor = LinearRegression(fit_intercept=False)

    # Fit the linear regression model
    regressor.fit(X_feature, y)

    # Get the coefficient for the current feature
    coefficient = regressor.coef_[0]

    # Add the coefficient to the list
    coefficients.append(coefficient)


In [101]:
import numpy as np
X_new=emb_pos[3,:]
predictions = np.zeros(X_new.shape[0])

# Make predictions using the obtained coefficients
for feature_index in range(768):
    # Select the current feature
    X_feature = X_new[feature_index]

    # Multiply the feature by its corresponding coefficient and add it to the predictions
    predictions[feature_index]=X_feature * coefficients[feature_index]

In [102]:
from sklearn.metrics.pairwise import cosine_similarity
word_embeddings = model.wte.weight

# Convert to numpy for easier calculations
word_embeddings_np = word_embeddings.detach().numpy()

# This should be your predicted embedding
predicted_embedding = predictions

# Reshape it to 2D because cosine_similarity expects 2D arrays
predicted_embedding = predicted_embedding.reshape(1, -1)

# Compute the cosine similarities
similarities = cosine_similarity(predicted_embedding, word_embeddings_np)

# Get the indices of the top 10 closest tokens
top_10_indices = similarities[0].argsort()[-10:][::-1]

# Print the closest tokens
for index in top_10_indices:
    token = tokenizer.decode([index])
    print(f"Token: {token}, Similarity: {similarities[0][index]}")

Token: bring, Similarity: 0.13129960579542277
Token:  defunct, Similarity: 0.12518269726551684
Token:  presumed, Similarity: 0.11389661342921062
Token: ACH, Similarity: 0.1128229888463981
Token: aum, Similarity: 0.11199317371042172
Token:  Eternity, Similarity: 0.11149686015522803
Token: imaru, Similarity: 0.11114468272398045
Token: ixt, Similarity: 0.11080637292754249
Token: had, Similarity: 0.11068742658949278
Token: rites, Similarity: 0.11049747390073628
