In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from transformers import BertTokenizer, BertModel
import torch

# Load the dataset
df = pd.read_csv('train.csv')

# Create a mapping from metaphorID to the actual metaphor words
metaphor_mapping = {
    0: 'road',
    1: 'candle',
    2: 'light',
    3: 'spice',
    4: 'ride',
    5: 'train',
    6: 'boat'
}

df['metaphor_word'] = df['metaphorID'].map(metaphor_mapping)
df['label_boolean'] = df['label_boolean'].astype(int)

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Function to encode text with BERT
def encode_with_bert(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=12, padding='max_length')
    outputs = model(**inputs)
    # Use the pooled output for representing the entire sentence
    return outputs.pooler_output.detach().numpy()

# Encode each sentence in the dataset
X_bert = np.array([encode_with_bert(text, tokenizer, model).squeeze() for text in df['text']])

# One-hot encode the 'metaphor_word' column
onehot_encoder = OneHotEncoder(sparse=False)
X_metaphor_word = onehot_encoder.fit_transform(df[['metaphor_word']])

# Combine BERT features with one-hot encoded metaphor_word features
X = np.hstack((X_bert, X_metaphor_word))
y = df['label_boolean'].values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model selection and training
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Predicting the test set results
y_pred = classifier.predict(X_test)

# Evaluation of the results
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
