In [None]:
"""
 asymmetric semantic search, you usually have a short query (like a question or some keywords)
 and you want to find a longer paragraph answering the query. An example would be a query like
 “What is Python” and you want to find the paragraph “Python is an interpreted, high-level and
 general-purpose programming language. Python’s design philosophy …”.
  For asymmetric tasks, flipping the query and the entries in your corpus usually does not make sense.

"""

In [2]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.met

In [3]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer

# Load the updated Excel file
file_path = '/content/drive/MyDrive/ALY6080/FINAL_folder/updated_ques_table_info.xlsx'
df = pd.read_excel(file_path)

# Load the pre-trained model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Function to calculate asymmetric similarity between a question and table info
def calculate_asymmetric_similarity(question, table_info):
    # Encode the question and table info
    question_embedding = embedder.encode(question, convert_to_tensor=True)
    table_info_embedding = embedder.encode(table_info, convert_to_tensor=True)

    # Calculate cosine similarity
    similarity_score = torch.nn.functional.cosine_similarity(question_embedding, table_info_embedding, dim=0)

    return similarity_score.item()

# Apply the similarity function for each row
df['Similarity_Score'] = df.apply(lambda row: calculate_asymmetric_similarity(row['Human Questions'], row['Table_info']), axis=1)

# Save the updated dataframe to a new Excel file
output_file_path = '/content/drive/MyDrive/ALY6080/FINAL_folder/updated_ques_table_info_with_similarity.xlsx'
df.to_excel(output_file_path, index=False)

print("Updated Excel file saved to:", output_file_path)


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Updated Excel file saved to: /content/drive/MyDrive/ALY6080/FINAL_folder/updated_ques_table_info_with_similarity.xlsx
