In [None]:
### This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import warnings 
warnings.filterwarnings("ignore")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!git clone -b feature/add_hf https://github.com/nielsrogge/ImageBind.git
%cd ImageBind

In [None]:
!pip install .

In [None]:
from imagebind import data
import torch
from imagebind.models import imagebind_model
from imagebind.models.imagebind_model import ModalityType
from imagebind.models.imagebind_model import ImageBindModel

text_list=["A dog.", "A car", "A bird"]
image_paths=[".assets/dog_image.jpg", ".assets/car_image.jpg", ".assets/bird_image.jpg"]
audio_paths=[".assets/dog_audio.wav", ".assets/car_audio.wav", ".assets/bird_audio.wav"]

device = "cuda:0" if torch.cuda.is_available() else "cpu"

model = ImageBindModel.from_pretrained("nielsr/imagebind-huge")
model.eval()
model.to(device)


# Full Knowledge

In [None]:
import ast
input_df = pd.read_csv('/kaggle/input/business-json/Task_2_data.csv') 
# Function to extract entity names
def extract_entity_name(entities_str):
    entities_list = ast.literal_eval(entities_str)
    # Return the first two mentions if they exist, otherwise None
    return (entities_list[0], entities_list[1]) if len(entities_list) > 1 else (entities_list[0], None)

# Apply the function to the 'entities' column and expand into two new columns
input_df[['sentence_1_entity_name', 'sentence_2_entity_name']] = input_df['entity_titles'].apply(extract_entity_name).apply(pd.Series)

# Display the updated DataFrame
print(input_df)
entity_df = pd.read_csv('/kaggle/input/business-json/Knowledge.csv')
asr_df = pd.read_csv("/kaggle/input/business-json/task_2_asr_out.csv")

In [None]:
import os
import torch
from imagebind.models.imagebind_model import ModalityType, ImageBindModel
from imagebind import data
import pandas as pd
from tqdm import tqdm
device = "cuda:0" if torch.cuda.is_available() else "cpu"
from sklearn.metrics.pairwise import cosine_similarity

# Function to process audio files in batches
def process_audio_in_batches(txt, batch_size=16):
    all_embeddings = []
    for i in tqdm(range(0, len(txt), batch_size), desc="Processing Batches"):
        batch_txt = txt[i:i + batch_size]
        embed_inputs = data.load_and_transform_text(batch_txt, device)
        
        with torch.no_grad():
            embeddings = model({
                ModalityType.TEXT: embed_inputs,
            })
        
        all_embeddings.append(embeddings[ModalityType.TEXT])
        torch.cuda.empty_cache()
    
    return torch.cat(all_embeddings, dim = 0)

# # Process entity audio files in batches
entity_embeddings= process_audio_in_batches(entity_df['Knowledge'].tolist())

# Process sentence audio files in batches
input1_embeddings  = process_audio_in_batches(asr_df['Sentence_1_transcript'].tolist())
input2_embeddings = process_audio_in_batches(asr_df['Sentence_2_transcript'].tolist())


In [None]:
# Initialize counters for ranking accuracy
top1_correct = 0
top5_correct = 0
top10_correct = 0

res_data = []

# Step 3: Calculate cosine similarity and evaluate rankings
for i, q_id in enumerate(input_df['QID']):
    # Get the similarity scores for the current sentence with all entities
    similarity_scores1 = cosine_similarity(input1_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    similarity_scores2 = cosine_similarity(input2_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    # Get the indices of entities sorted by similarity score (descending order)
    ranked_entity_indices1 = similarity_scores1.argsort()[::-1]
    ranked_entity_indices2 = similarity_scores2.argsort()[::-1]
    # Get the ranked entity names
    ranked_entity_names1 = entity_df['Entity_name'].iloc[ranked_entity_indices1]
    ranked_entity_names2 = entity_df['Entity_name'].iloc[ranked_entity_indices2]

    # Get the actual entity name for the current sentence
    actual_entity_name1 = input_df['sentence_1_entity_name'].iloc[i]
    actual_entity_name2 = input_df['sentence_2_entity_name'].iloc[i]
    
    res_data.append(
    {
        'q_id': q_id,
        'linked_entity_sentnece_1': ranked_entity_names1.iloc[0],
        'actual_entity_sentence_1' : actual_entity_name1,
        'linked_entity_sentnece_2': ranked_entity_names2.iloc[0],
        'actual_entity_sentence_2' : actual_entity_name2,
    }
    )
    # Step 4: Check if the actual entity is within the top 1, 5, and 10
    if actual_entity_name1 == ranked_entity_names1.iloc[0]:
        top1_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:10].values:
        top10_correct += 1
        
    if actual_entity_name2 == ranked_entity_names2.iloc[0]:
        top1_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:10].values:
        top10_correct += 1


# Step 5: Calculate and print ranking accuracies
total_sentences = len(input_df)

top1_accuracy = top1_correct / total_sentences * 50
top5_accuracy = top5_correct / total_sentences * 50
top10_accuracy = top10_correct / total_sentences * 50

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")
print(f"Top-10 Accuracy: {top10_accuracy:.2f}%")

In [None]:
ner_df = pd.DataFrame(res_data)
ner_df.to_csv("Task2_asr_entity_linked_ImageBIND_results.csv")

# Entity Label

In [None]:

# # Process entity audio files in batches
entity_embeddings = process_audio_in_batches(entity_df['Entity_name'].tolist())

# Process sentence audio files in batches
input1_embeddings  = process_audio_in_batches(asr_df['Sentence_1_transcript'].tolist())
input2_embeddings = process_audio_in_batches(asr_df['Sentence_2_transcript'].tolist())



In [None]:
# Initialize counters for ranking accuracy
top1_correct = 0
top5_correct = 0
top10_correct = 0

res_data = []

# Step 3: Calculate cosine similarity and evaluate rankings
for i, q_id in enumerate(input_df['QID']):
    # Get the similarity scores for the current sentence with all entities
    similarity_scores1 = cosine_similarity(input1_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    similarity_scores2 = cosine_similarity(input2_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    # Get the indices of entities sorted by similarity score (descending order)
    ranked_entity_indices1 = similarity_scores1.argsort()[::-1]
    ranked_entity_indices2 = similarity_scores2.argsort()[::-1]
    # Get the ranked entity names
    ranked_entity_names1 = entity_df['Entity_name'].iloc[ranked_entity_indices1]
    ranked_entity_names2 = entity_df['Entity_name'].iloc[ranked_entity_indices2]

    # Get the actual entity name for the current sentence
    actual_entity_name1 = input_df['sentence_1_entity_name'].iloc[i]
    actual_entity_name2 = input_df['sentence_2_entity_name'].iloc[i]
    
    res_data.append(
    {
        'q_id': q_id,
        'linked_entity_sentnece_1': ranked_entity_names1.iloc[0],
        'actual_entity_sentence_1' : actual_entity_name1,
        'linked_entity_sentnece_2': ranked_entity_names2.iloc[0],
        'actual_entity_sentence_2' : actual_entity_name2,
    }
    )
    # Step 4: Check if the actual entity is within the top 1, 5, and 10
    if actual_entity_name1 == ranked_entity_names1.iloc[0]:
        top1_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:10].values:
        top10_correct += 1
        
    if actual_entity_name2 == ranked_entity_names2.iloc[0]:
        top1_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:10].values:
        top10_correct += 1


# Step 5: Calculate and print ranking accuracies
total_sentences = len(input_df)

top1_accuracy = top1_correct / total_sentences * 50
top5_accuracy = top5_correct / total_sentences * 50
top10_accuracy = top10_correct / total_sentences * 50

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")
print(f"Top-10 Accuracy: {top10_accuracy:.2f}%")

In [None]:
ner_df = pd.DataFrame(res_data)
ner_df.to_csv("Task2_asr_entity_linked_ImageBIND_label_results.csv")

# Partial Knowledge

In [None]:
# Calculate the substring for the first 20% of each knowledge string
entity_df['Knowledge_20'] = entity_df['Knowledge'].apply(lambda x: x[:int(len(x) * 0.2)])

# # Process entity audio files in batches
entity_embeddings = process_audio_in_batches(entity_df['Knowledge_20'].tolist())

# Process sentence audio files in batches
input1_embeddings  = process_audio_in_batches(asr_df['Sentence_1_transcript'].tolist())
input2_embeddings = process_audio_in_batches(asr_df['Sentence_2_transcript'].tolist())



In [None]:
# Initialize counters for ranking accuracy
top1_correct = 0
top5_correct = 0
top10_correct = 0

res_data = []

# Step 3: Calculate cosine similarity and evaluate rankings
for i, q_id in enumerate(input_df['QID']):
    # Get the similarity scores for the current sentence with all entities
    similarity_scores1 = cosine_similarity(input1_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    similarity_scores2 = cosine_similarity(input2_embeddings[i].unsqueeze(0).cpu().numpy(), entity_embeddings.cpu().numpy()).flatten()
    # Get the indices of entities sorted by similarity score (descending order)
    ranked_entity_indices1 = similarity_scores1.argsort()[::-1]
    ranked_entity_indices2 = similarity_scores2.argsort()[::-1]
    # Get the ranked entity names
    ranked_entity_names1 = entity_df['Entity_name'].iloc[ranked_entity_indices1]
    ranked_entity_names2 = entity_df['Entity_name'].iloc[ranked_entity_indices2]

    # Get the actual entity name for the current sentence
    actual_entity_name1 = input_df['sentence_1_entity_name'].iloc[i]
    actual_entity_name2 = input_df['sentence_2_entity_name'].iloc[i]
    
    res_data.append(
    {
        'q_id': q_id,
        'linked_entity_sentnece_1': ranked_entity_names1.iloc[0],
        'actual_entity_sentence_1' : actual_entity_name1,
        'linked_entity_sentnece_2': ranked_entity_names2.iloc[0],
        'actual_entity_sentence_2' : actual_entity_name2,
    }
    )
    # Step 4: Check if the actual entity is within the top 1, 5, and 10
    if actual_entity_name1 == ranked_entity_names1.iloc[0]:
        top1_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name1 in ranked_entity_names1.iloc[:10].values:
        top10_correct += 1
        
    if actual_entity_name2 == ranked_entity_names2.iloc[0]:
        top1_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:5].values:
        top5_correct += 1
    if actual_entity_name2 in ranked_entity_names2.iloc[:10].values:
        top10_correct += 1


# Step 5: Calculate and print ranking accuracies
total_sentences = len(input_df)

top1_accuracy = top1_correct / total_sentences * 50
top5_accuracy = top5_correct / total_sentences * 50
top10_accuracy = top10_correct / total_sentences * 50

print(f"Top-1 Accuracy: {top1_accuracy:.2f}%")
print(f"Top-5 Accuracy: {top5_accuracy:.2f}%")
print(f"Top-10 Accuracy: {top10_accuracy:.2f}%")

In [None]:
ner_df = pd.DataFrame(res_data)
ner_df.to_csv("Task2_asr_entity_linked_ImageBIND_partial_knowledge_results.csv")