In [1]:
import pandas as pd
from gensim.models import KeyedVectors
import gensim.downloader as api
import re

# Load Word2Vec model
model = api.load('word2vec-google-news-300')

# Read CSV file
input_csv_path = 'simile_result.csv'
df = pd.read_csv(input_csv_path)

# Clean the data
def preprocess_text(text):
    if pd.isna(text):
        return ''
    # Delete punctuation
    text = re.sub(r'[^\w\s]', '', str(text))
    text = text.lower()
    return text

# Clean Tenor and Vehicle columns
df['Tenor'] = df['Tenor'].apply(preprocess_text)
df['Vehicle'] = df['Vehicle'].apply(preprocess_text)

# Calculate semantic similarity
def calculate_similarity(row):
    tenor = row['Tenor']
    vehicle = row['Vehicle']
    try:
        return model.similarity(tenor, vehicle)
    except KeyError:
        return 0.0

df['Creative Score'] = df.apply(calculate_similarity, axis=1)

# Save the result
output_csv_path = 'final_result.csv'
df.to_csv(output_csv_path, index=False)
