In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model
from keras.layers import Input, Dense, concatenate
from keras.utils import to_categorical

# Example dataset (replace with your own dataset)
memes_data = pd.read_csv('/content/hateful_memes_original.csv')

# Preprocessing text
text_corpus = memes_data['text']
vectorizer = TfidfVectorizer(max_features=1000)
text_features = vectorizer.fit_transform(text_corpus)

# Preprocessing images
image_folder = '/content/drive/MyDrive/archive (1)/hateful_memes/img'  # Replace 'images_folder' with your folder path
image_paths = [os.path.join(image_folder, filename) for filename in memes_data['img']]
image_features = []
for img_path in image_paths:
    img = Image.open(img_path)
    img = img.resize((224, 224))  # Resize images to fit VGG16 input size
    img_array = np.array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    image_features.append(img_array)

image_features = np.vstack(image_features)

# Split data into train and test sets
X_text_train, X_text_test, X_image_train, X_image_test, y_train, y_test = train_test_split(
    text_features, image_features, memes_data['label'], test_size=0.2, random_state=42
)
# Define and train the multimodal model
text_input = Input(shape=(1000,))
image_input = Input(shape=(224, 224, 3))

# Text processing layers
text_dense = Dense(256, activation='relu')(text_input)

# Image processing layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False
image_output = base_model(image_input)
image_flattened = Dense(256, activation='relu')(image_output)

# Concatenate text and image features
concatenated = concatenate([text_dense, image_flattened])
output = Dense(3, activation='softmax')(concatenated)

model = Model(inputs=[text_input, image_input], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Convert labels to categorical
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

# Train the model
model.fit([X_text_train, X_image_train], y_train_categorical, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model
y_pred = model.predict([X_text_test, X_image_test])
y_pred_classes = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(np.argmax(y_test_categorical, axis=1), y_pred_classes)
print("Accuracy:", accuracy)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/archive (1)/hateful_memes/img/img/32674.png'

In [None]:
print(memes_data.columns)

Index(['text', 'img', 'label'], dtype='object')
