In [None]:
pip install keras-tuner

# try

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import random
from torch.utils.data import Dataset, DataLoader, random_split


In [77]:

video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv').drop(columns=['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness'])
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv').drop(columns=['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness'])

scaler_video = StandardScaler()
video_features = scaler_video.fit_transform(video_features_df.iloc[:, 1:].values)

scaler_audio = StandardScaler()
audio_features = scaler_audio.fit_transform(audio_features_df.iloc[:, 1:].values)


In [78]:
assert len(video_features) == len(audio_features)

In [79]:
class VideoAudioDataset(Dataset):
    def __init__(self, video_features, audio_features):
        self.video_features = video_features
        self.audio_features = audio_features
        
    def __len__(self):
        return len(self.video_features)
    
    def __getitem__(self, idx):
        video = self.video_features[idx]
        audio = self.audio_features[idx]
        positive_pair = (video, audio)
        
        random_idx = random.choice([i for i in range(len(self.video_features)) if i != idx])
        negative_audio = self.audio_features[random_idx]
        negative_pair = (video, negative_audio)
        
        return positive_pair, negative_pair

dataset = VideoAudioDataset(video_features, audio_features)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

class ComplexCNNEncoder(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout_rate=0.5):
        super(ComplexCNNEncoder, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=hidden_sizes[0], kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(hidden_sizes[0])
        self.dropout1 = nn.Dropout(dropout_rate)
        self.conv2 = nn.Conv1d(in_channels=hidden_sizes[0], out_channels=hidden_sizes[1], kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(hidden_sizes[1])
        self.dropout2 = nn.Dropout(dropout_rate)
        self.conv3 = nn.Conv1d(in_channels=hidden_sizes[1], out_channels=hidden_sizes[2], kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(hidden_sizes[2])
        self.fc = nn.Linear(hidden_sizes[2], output_size)
        
    def forward(self, x):
        x = x.unsqueeze(1)  
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.dropout2(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.avg_pool1d(x, kernel_size=x.size()[2]) 
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

video_encoder = ComplexCNNEncoder(input_size=1, hidden_sizes=[128, 64, 32], output_size=128, dropout_rate=0.5)
audio_encoder = ComplexCNNEncoder(input_size=1, hidden_sizes=[128, 64, 32], output_size=128, dropout_rate=0.5)


In [None]:
def contrastive_loss(z_i, z_j, temperature=0.1):
    cos_sim = F.cosine_similarity(z_i, z_j) / temperature
    loss = -torch.log(torch.exp(cos_sim) / torch.sum(torch.exp(cos_sim)))
    return torch.mean(loss)

In [None]:
total_dataset = VideoAudioDataset(video_features, audio_features)
train_size = int(0.8 * len(total_dataset))
val_size = len(total_dataset) - train_size
train_dataset, val_dataset = random_split(total_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
video_encoder = ComplexCNNEncoder(input_size=1, hidden_sizes=[128, 64, 32], output_size=128, dropout_rate=0.5)
audio_encoder = ComplexCNNEncoder(input_size=1, hidden_sizes=[128, 64, 32], output_size=128, dropout_rate=0.5)

In [85]:
def train_epoch(models, dataloader, optimizer):
    video_encoder, audio_encoder = models
    video_encoder.train()
    audio_encoder.train()
    total_loss = 0
    for positive_pair, negative_pair in dataloader:
        optimizer.zero_grad()
        
        video_positive, audio_positive = positive_pair[0].to(torch.float32), positive_pair[1].to(torch.float32)
        video_negative, audio_negative = negative_pair[0].to(torch.float32), negative_pair[1].to(torch.float32)

        z_i_positive = video_encoder(video_positive)
        z_j_positive = audio_encoder(audio_positive)
        z_i_negative = video_encoder(video_negative)
        z_j_negative = audio_encoder(audio_negative)
        
        loss_positive = contrastive_loss(z_i_positive, z_j_positive)
        loss_negative = contrastive_loss(z_i_negative, z_j_negative)
        
        loss = loss_positive + loss_negative
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate_epoch(models, dataloader):
    video_encoder, audio_encoder = models
    video_encoder.eval()
    audio_encoder.eval()
    total_loss = 0
    with torch.no_grad():
        for positive_pair, negative_pair in dataloader:
            video_positive, audio_positive = positive_pair[0].to(torch.float32), positive_pair[1].to(torch.float32)
            video_negative, audio_negative = negative_pair[0].to(torch.float32), negative_pair[1].to(torch.float32)

            z_i_positive = video_encoder(video_positive)
            z_j_positive = audio_encoder(audio_positive)
            z_i_negative = video_encoder(video_negative)
            z_j_negative = audio_encoder(audio_negative)
            
            loss_positive = contrastive_loss(z_i_positive, z_j_positive)
            loss_negative = contrastive_loss(z_i_negative, z_j_negative)
            
            loss = loss_positive + loss_negative
            
            total_loss += loss.item()
    return total_loss / len(dataloader)


In [None]:
optimizer = torch.optim.Adam(list(video_encoder.parameters()) + list(audio_encoder.parameters()), lr=0.0001)

# Training 
num_epochs = 100
for epoch in range(num_epochs):
    train_loss = train_epoch((video_encoder, audio_encoder), train_dataloader, optimizer)
    val_loss = evaluate_epoch((video_encoder, audio_encoder), val_dataloader)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 1/100, Train Loss: 6.8025, Val Loss: 4.1591
Epoch 2/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 3/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 4/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 5/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 6/100, Train Loss: 6.8025, Val Loss: 4.1591
Epoch 7/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 8/100, Train Loss: 6.8025, Val Loss: 4.1591
Epoch 9/100, Train Loss: 6.8025, Val Loss: 4.1594
Epoch 10/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 11/100, Train Loss: 6.8025, Val Loss: 4.1592
Epoch 12/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 13/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 14/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 15/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 16/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 17/100, Train Loss: 6.8025, Val Loss: 4.1590
Epoch 18/100, Train Loss: 6.8025, Val Loss: 4.1591
Epoch 19/100, Train Loss: 6.8025, Val Loss: 4.1591
Epoch 20/100, Train Loss: 6.8025, Val Lo

In [None]:
num_epochs = 100
for epoch in range(num_epochs):
    train_loss = train((video_encoder, audio_encoder), train_dataloader, optimizer)
    val_loss = evaluate((video_encoder, audio_encoder), val_dataloader)
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

  video_positive = torch.tensor(video_positive, dtype=torch.float32)
  audio_positive = torch.tensor(audio_positive, dtype=torch.float32)
  video_negative = torch.tensor(video_negative, dtype=torch.float32)
  audio_negative = torch.tensor(audio_negative, dtype=torch.float32)
  video_positive = torch.tensor(video_positive, dtype=torch.float32)


Epoch 1/100, Train Loss: 6.8055, Val Loss: 4.1604
Epoch 2/100, Train Loss: 6.8036, Val Loss: 4.1594
Epoch 3/100, Train Loss: 6.8028, Val Loss: 4.1596
Epoch 4/100, Train Loss: 6.8029, Val Loss: 4.1607
Epoch 5/100, Train Loss: 6.8034, Val Loss: 4.1624
Epoch 6/100, Train Loss: 6.8040, Val Loss: 4.1618
Epoch 7/100, Train Loss: 6.8049, Val Loss: 4.1626
Epoch 8/100, Train Loss: 6.8050, Val Loss: 4.1615
Epoch 9/100, Train Loss: 6.8056, Val Loss: 4.1624
Epoch 10/100, Train Loss: 6.8046, Val Loss: 4.1615
Epoch 11/100, Train Loss: 6.8040, Val Loss: 4.1604
Epoch 12/100, Train Loss: 6.8036, Val Loss: 4.1594
Epoch 13/100, Train Loss: 6.8029, Val Loss: 4.1593
Epoch 14/100, Train Loss: 6.8026, Val Loss: 4.1590
Epoch 15/100, Train Loss: 6.8026, Val Loss: 4.1590
Epoch 16/100, Train Loss: 6.8027, Val Loss: 4.1593
Epoch 17/100, Train Loss: 6.8030, Val Loss: 4.1598
Epoch 18/100, Train Loss: 6.8032, Val Loss: 4.1599
Epoch 19/100, Train Loss: 6.8035, Val Loss: 4.1597
Epoch 20/100, Train Loss: 6.8032, Val Lo

#  

In [None]:
def create_encoder(input_shape, name):
    input_layer = keras.Input(shape=input_shape)
    x = layers.Conv1D(32, 3, activation='relu', padding='same')(input_layer)
    x = layers.MaxPooling1D()(x)
    x = layers.Conv1D(64, 3, activation='relu', padding='same')(x)
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    return keras.Model(inputs=input_layer, outputs=x, name=name)

class DualEncoder(keras.Model):
    def __init__(self, encoder_a, encoder_v, **kwargs):
        super().__init__(**kwargs)
        self.encoder_a = encoder_a
        self.encoder_v = encoder_v
        self.regression_head = keras.layers.Dense(5, activation=None)  # For predicting personality traits
        
    def call(self, inputs):
        audio_input, video_input = inputs
        audio_embedding = self.encoder_a(audio_input)
        video_embedding = self.encoder_v(video_input)
        combined_embedding = keras.layers.concatenate([audio_embedding, video_embedding], axis=-1)
        personality_scores_pred = self.regression_head(combined_embedding)
        return audio_embedding, video_embedding, personality_scores_pred

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            audio_embedding, video_embedding, personality_scores_pred = self(x, training=True)
            loss = self.compute_loss(y, personality_scores_pred, audio_embedding, video_embedding)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"loss": loss}

    def compute_loss(self, true_scores, pred_scores, audio_embedding, video_embedding):
        mse_loss = tf.reduce_mean(keras.losses.mean_squared_error(true_scores, pred_scores))
        cosine_loss = tf.reduce_mean(1 - tf.keras.losses.cosine_similarity(audio_embedding, video_embedding))
        return mse_loss + cosine_loss

encoder_a = create_encoder(input_shape=(audio_features.shape[1], 1), name="audio_encoder")
encoder_v = create_encoder(input_shape=(video_features.shape[1], 1), name="video_encoder")

dual_encoder = DualEncoder(encoder_a, encoder_v)
dual_encoder.compile(optimizer=keras.optimizers.Adam())

batch_size = 32

audio_features_expanded = np.expand_dims(audio_features, axis=-1)
video_features_expanded = np.expand_dims(video_features, axis=-1)

dataset = tf.data.Dataset.from_tensor_slices(((audio_features_expanded, video_features_expanded), personality_scores))
dataset = dataset.batch(batch_size)

num_epochs = 200
dual_encoder.fit(dataset, epochs=num_epochs)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x1cfc6d61ca0>

In [None]:

audio_features_expanded = np.expand_dims(audio_features, axis=-1)
video_features_expanded = np.expand_dims(video_features, axis=-1)

(audio_train, audio_test, video_train, video_test, scores_train, scores_test) = train_test_split(
    audio_features_expanded, video_features_expanded, personality_scores, test_size=0.2, random_state=42
)


In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices(((audio_train, video_train), scores_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices(((audio_test, video_test), scores_test)).batch(32)

In [None]:
encoder_a = create_encoder(input_shape=(audio_features.shape[1], 1), name="audio_encoder")
encoder_v = create_encoder(input_shape=(video_features.shape[1], 1), name="video_encoder")

In [39]:

dual_encoder = DualEncoder(encoder_a, encoder_v)
dual_encoder.compile(optimizer=keras.optimizers.Adam())


In [None]:
num_epochs = 200
dual_encoder.fit(train_dataset, epochs=num_epochs)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x1cf8a954c40>

In [None]:
test_audio_embeddings, test_video_embeddings, predictions = dual_encoder.predict(test_dataset)

mse = mean_squared_error(scores_test, predictions)
r2 = r2_score(scores_test, predictions)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.027039427146574722
R-squared: -0.0390604869446296


# 

In [42]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras import layers, regularizers

In [43]:
video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')


In [None]:
video_features = video_features_df.filter(regex='Feature_').values
audio_features = audio_features_df.drop(columns=['name']).iloc[:, :-5].values
personality_scores = video_features_df[['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']].values


In [None]:
def create_enhanced_encoder(input_shape, name):
    input_layer = keras.Input(shape=input_shape)
    x = layers.Conv1D(64, 3, activation='relu', padding='same')(input_layer)
    x = layers.MaxPooling1D()(x)
    x = layers.Conv1D(128, 3, activation='relu', padding='same')(x)
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.Dropout(0.5)(x)  
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
    return keras.Model(inputs=input_layer, outputs=x, name=name)


class DualEncoder(keras.Model):
    def __init__(self, encoder_a, encoder_v, **kwargs):
        super().__init__(**kwargs)
        self.encoder_a = encoder_a
        self.encoder_v = encoder_v
        self.regression_head = keras.layers.Dense(5, activation=None) 
        
    def call(self, inputs):
        audio_input, video_input = inputs
        audio_embedding = self.encoder_a(audio_input)
        video_embedding = self.encoder_v(video_input)
        combined_embedding = keras.layers.concatenate([audio_embedding, video_embedding], axis=-1)
        personality_scores_pred = self.regression_head(combined_embedding)
        return audio_embedding, video_embedding, personality_scores_pred

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            audio_embedding, video_embedding, personality_scores_pred = self(x, training=True)
            loss = self.compute_loss(y, personality_scores_pred, audio_embedding, video_embedding)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"loss": loss}

    def compute_loss(self, true_scores, pred_scores, audio_embedding, video_embedding):
        mse_loss = tf.reduce_mean(keras.losses.mean_squared_error(true_scores, pred_scores))
        cosine_loss = tf.reduce_mean(1 - tf.keras.losses.cosine_similarity(audio_embedding, video_embedding))
        return mse_loss + cosine_loss


In [None]:
def scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [None]:
audio_features_expanded = np.expand_dims(audio_features, axis=-1)
video_features_expanded = np.expand_dims(video_features, axis=-1)
(audio_train, audio_test, video_train, video_test, scores_train, scores_test) = train_test_split(
    audio_features_expanded, video_features_expanded, personality_scores, test_size=0.2, random_state=42
)
train_dataset = tf.data.Dataset.from_tensor_slices(((audio_train, video_train), scores_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices(((audio_test, video_test), scores_test)).batch(32)

encoder_a_enhanced = create_enhanced_encoder(input_shape=(audio_features.shape[1], 1), name="audio_encoder_enhanced")
encoder_v_enhanced = create_enhanced_encoder(input_shape=(video_features.shape[1], 1), name="video_encoder_enhanced")

dual_encoder_enhanced = DualEncoder(encoder_a_enhanced, encoder_v_enhanced)
dual_encoder_enhanced.compile(optimizer=keras.optimizers.Adam())

callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
num_epochs = 2000  

dual_encoder_enhanced.fit(train_dataset, epochs=num_epochs, callbacks=[callback])

_, _, predictions_enhanced = dual_encoder_enhanced.predict(test_dataset)

mse_enhanced = mean_squared_error(scores_test, predictions_enhanced)
r2_enhanced = r2_score(scores_test, predictions_enhanced)

print(f"Enhanced Model - Mean Squared Error: {mse_enhanced}")
print(f"Enhanced Model - R-squared: {r2_enhanced}")

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

In [None]:
# improved code -------------------------------------------------------------------------------------------------------

# CL

In [49]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [50]:

video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')


In [None]:

video_features = video_features_df.iloc[:, 1:-5].values 
audio_features = audio_features_df.iloc[:, 1:-5].values 

video_features = (video_features - np.mean(video_features, axis=0)) / np.std(video_features, axis=0)
audio_features = (audio_features - np.mean(audio_features, axis=0)) / np.std(audio_features, axis=0)
personality_scores = video_features_df[['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']].values

split_index = int(0.8 * len(video_features))  
train_video_features = video_features[:split_index]
test_video_features = video_features[split_index:]

train_audio_features = audio_features[:split_index]
test_audio_features = audio_features[split_index:]

train_personality_scores = personality_scores[:split_index]
test_personality_scores = personality_scores[split_index:]

train_dataset = tf.data.Dataset.from_tensor_slices((train_video_features, train_audio_features, train_personality_scores))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32)

test_dataset = tf.data.Dataset.from_tensor_slices((test_video_features, test_audio_features, test_personality_scores))
test_dataset = test_dataset.batch(32)


In [None]:
def prepare_features(video_features, audio_features, personality_scores):
    return (video_features, audio_features), personality_scores

train_dataset = tf.data.Dataset.from_tensor_slices((train_video_features, train_audio_features, train_personality_scores))
train_dataset = train_dataset.map(prepare_features)  
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32)

test_dataset = tf.data.Dataset.from_tensor_slices((test_video_features, test_audio_features, test_personality_scores))
test_dataset = test_dataset.map(prepare_features) 
test_dataset = test_dataset.batch(32)



In [None]:
class ContrastiveModel(keras.Model):
    def __init__(self, encoder_video, encoder_audio, predictor, **kwargs):
        super().__init__(**kwargs)
        self.encoder_video = encoder_video
        self.encoder_audio = encoder_audio
        self.predictor = predictor

    def call(self, inputs):
        video_input, audio_input = inputs
        video_embedding = self.encoder_video(video_input)
        audio_embedding = self.encoder_audio(audio_input)
        combined_embedding = tf.concat([video_embedding, audio_embedding], axis=1)
        personality_pred = self.predictor(combined_embedding)
        return video_embedding, audio_embedding, personality_pred

    def train_step(self, data):
  
        (video_input, audio_input), true_personality = data

        with tf.GradientTape() as tape:
            video_embedding, audio_embedding, personality_pred = self((video_input, audio_input), training=True)
           
            contrastive_loss = -tf.reduce_mean(tf.keras.losses.cosine_similarity(video_embedding, audio_embedding))
            
            regression_loss = tf.reduce_mean(keras.losses.MSE(true_personality, personality_pred))
           
            total_loss = contrastive_loss + regression_loss

        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return {"total_loss": total_loss, "contrastive_loss": contrastive_loss, "regression_loss": regression_loss}

encoder_video = keras.Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
], name='encoder_video')

encoder_audio = keras.Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
], name='encoder_audio')

predictor = keras.Sequential([
    layers.Dense(64, activation='relu'),
    layers.Dense(5, activation=None)  
], name='predictor')

model = ContrastiveModel(encoder_video, encoder_audio, predictor)
model.compile(optimizer=keras.optimizers.Adam(1e-4))

model.fit(train_dataset, epochs=10, validation_data=test_dataset)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1cff4acbf10>

In [58]:
for (inputs, targets) in train_dataset.take(1):
    print(f'Inputs structure: {type(inputs)}, {len(inputs)}')
    print(f'Targets shape: {targets.shape}')
    video_input, audio_input = inputs
    print(f'Video input shape: {video_input.shape}, Audio input shape: {audio_input.shape}')


Inputs structure: <class 'tuple'>, 2
Targets shape: (30, 5)
Video input shape: (30, 768), Audio input shape: (30, 1024)


In [59]:
from sklearn.metrics.pairwise import cosine_similarity

def evaluate_embedding_alignment(model, test_data):
    video_embeddings, audio_embeddings = [], []
    for (video_input, audio_input), _ in test_data.unbatch().batch(1):
        video_emb, audio_emb, _ = model.predict((video_input, audio_input))
        video_embeddings.append(video_emb.flatten())
        audio_embeddings.append(audio_emb.flatten())
    
    video_embeddings = np.array(video_embeddings)
    audio_embeddings = np.array(audio_embeddings)
    similarity_scores = cosine_similarity(video_embeddings, audio_embeddings)
    mean_similarity = np.diag(similarity_scores).mean()
    print(f"Mean cosine similarity between corresponding video and audio embeddings: {mean_similarity:.4f}")

evaluate_embedding_alignment(model, test_dataset)


Mean cosine similarity between corresponding video and audio embeddings: 0.2598


In [60]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate_personality_predictions(model, test_data):
    y_true, y_pred = [], []
    for (video_input, audio_input), personality_scores in test_data.unbatch().batch(1):
        _, _, predictions = model.predict((video_input, audio_input))
        y_true.append(personality_scores.numpy().flatten())
        y_pred.append(predictions.flatten())
    
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    
    print(f"MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}")

evaluate_personality_predictions(model, test_dataset)


MSE: 0.4995, RMSE: 0.7068, MAE: 0.5684


# CL

In [None]:
import os
import collections
import json
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm import tqdm
import pandas as pd


tf.get_logger().setLevel("ERROR")

In [None]:

video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')

In [None]:

video_features = video_features_df.iloc[:, 1:-5].values  
audio_features = audio_features_df.iloc[:, 1:-5].values  

video_features = (video_features - np.mean(video_features, axis=0)) / np.std(video_features, axis=0)
audio_features = (audio_features - np.mean(audio_features, axis=0)) / np.std(audio_features, axis=0)

personality_scores = video_features_df[['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']].values

# CL -------------------------- complete code

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')

video_features = video_features_df.iloc[:, 1:-5].values  
audio_features = audio_features_df.iloc[:, 1:-5].values 

video_features = (video_features - np.mean(video_features, axis=0)) / np.std(video_features, axis=0)
audio_features = (audio_features - np.mean(audio_features, axis=0)) / np.std(audio_features, axis=0)


split_index = int(0.8 * len(video_features)) 
train_video_features = video_features[:split_index]
test_video_features = video_features[split_index:]

train_audio_features = audio_features[:split_index]
test_audio_features = audio_features[split_index:]

train_dataset = tf.data.Dataset.from_tensor_slices(({"video": train_video_features, "audio": train_audio_features}))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32)

test_dataset = tf.data.Dataset.from_tensor_slices(({"video": test_video_features, "audio": test_audio_features}))
test_dataset = test_dataset.batch(32)

def create_video_encoder():
    input_video = tf.keras.Input(shape=[video_features.shape[1]]) 
    x1 = tf.keras.layers.Reshape((video_features.shape[1], 1))(input_video)  
    x1 = tf.keras.layers.Conv1D(32, 3, activation='relu', padding='same', name="cnn1")(x1)
    x1 = tf.keras.layers.MaxPooling1D(name="maxpool1")(x1)
    x1 = tf.keras.layers.Flatten()(x1)
    x1 = tf.keras.layers.Dense(120, activation=None)(x1)
    return tf.keras.Model(inputs=input_video, outputs=x1, name="video_encoder")

def create_audio_encoder():
    input_audio = tf.keras.Input(shape=[audio_features.shape[1]])  
    x2 = tf.keras.layers.Reshape((audio_features.shape[1], 1))(input_audio)  
    x2 = tf.keras.layers.Conv1D(32, 3, activation='relu', padding='same')(x2)
    x2 = tf.keras.layers.MaxPooling1D()(x2)
    x2 = tf.keras.layers.Flatten()(x2)
    x2 = tf.keras.layers.Dense(120, activation=None)(x2)
    return tf.keras.Model(inputs=input_audio, outputs=x2, name="audio_encoder")

class TripleEncoder(tf.keras.Model):
    def __init__(self, video_encoder, audio_encoder, temperature=1.0, **kwargs):
        super().__init__(**kwargs)
        self.video_encoder = video_encoder
        self.audio_encoder = audio_encoder
        self.temperature = temperature
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")

    @property
    def metrics(self):
        return [self.loss_tracker]

    def call(self, features, training=False):
        video_embeddings = self.video_encoder(features["video"], training=training)
        audio_embeddings = self.audio_encoder(features["audio"], training=training)
        return video_embeddings, audio_embeddings

    def compute_loss(self, embeddings1, embeddings2):
        logits = tf.matmul(embeddings1, embeddings2, transpose_b=True) / self.temperature
        similarity_matrix = tf.matmul(embeddings2, embeddings2, transpose_b=True)
        targets = tf.keras.activations.softmax(similarity_matrix / (2 * self.temperature))
        loss = tf.keras.losses.categorical_crossentropy(
            y_true=targets, y_pred=logits, from_logits=True
        )
        return loss

    def train_step(self, features):
        with tf.GradientTape() as tape:
            video_embeddings, audio_embeddings = self(features, training=True)
            video_audio_loss = self.compute_loss(video_embeddings, audio_embeddings)
            total_loss = video_audio_loss
        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.loss_tracker.update_state(total_loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, features):
        video_embeddings, audio_embeddings = self(features, training=False)
        video_audio_loss = self.compute_loss(video_embeddings, audio_embeddings)
        total_loss = video_audio_loss
        self.loss_tracker.update_state(total_loss)
        return {"loss": self.loss_tracker.result()}

video_encoder = create_video_encoder()
audio_encoder = create_audio_encoder()

triple_encoder = TripleEncoder(video_encoder, audio_encoder, temperature=1.0)

triple_encoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))

num_epochs = 50
history = triple_encoder.fit(train_dataset, epochs=num_epochs)

evaluation_result = triple_encoder.evaluate(test_dataset)
print("Evaluation Loss:", evaluation_result)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Evaluation Loss: 3.3642518126517345e-16


In [3]:
# saving the model

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')

video_features = video_features_df.iloc[:, 1:-5].values  
audio_features = audio_features_df.iloc[:, 1:-5].values 

video_features = (video_features - np.mean(video_features, axis=0)) / np.std(video_features, axis=0)
audio_features = (audio_features - np.mean(audio_features, axis=0)) / np.std(audio_features, axis=0)

dataset = tf.data.Dataset.from_tensor_slices(({"video": video_features, "audio": audio_features}))
dataset = dataset.shuffle(buffer_size=1024).batch(32)

In [None]:
# TripleEncoder MEANS DUAL ENCODER

In [5]:
def create_video_encoder():
    input_video = tf.keras.Input(shape=[video_features.shape[1]])  # Assuming shape of video features
    x1 = tf.keras.layers.Reshape((video_features.shape[1], 1))(input_video)  # Adding a channel dimension
    x1 = tf.keras.layers.Conv1D(32, 3, activation='relu', padding='same', name="cnn1")(x1)
    x1 = tf.keras.layers.MaxPooling1D(name="maxpool1")(x1)
    x1 = tf.keras.layers.Flatten()(x1)
    x1 = tf.keras.layers.Dense(120, activation=None)(x1)
    return tf.keras.Model(inputs=input_video, outputs=x1, name="video_encoder")

def create_audio_encoder():
    input_audio = tf.keras.Input(shape=[audio_features.shape[1]])  # Assuming shape of audio features
    x2 = tf.keras.layers.Reshape((audio_features.shape[1], 1))(input_audio)  # Adding a channel dimension
    x2 = tf.keras.layers.Conv1D(32, 3, activation='relu', padding='same')(x2)
    x2 = tf.keras.layers.MaxPooling1D()(x2)
    x2 = tf.keras.layers.Flatten()(x2)
    x2 = tf.keras.layers.Dense(120, activation=None)(x2)
    return tf.keras.Model(inputs=input_audio, outputs=x2, name="audio_encoder")

class TripleEncoder(tf.keras.Model):
    def __init__(self, video_encoder, audio_encoder, temperature=1.0, **kwargs):
        super().__init__(**kwargs)
        self.video_encoder = video_encoder
        self.audio_encoder = audio_encoder
        self.temperature = temperature
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")

    @property
    def metrics(self):
        return [self.loss_tracker]

    def call(self, features, training=False):
        video_embeddings = self.video_encoder(features["video"], training=training)
        audio_embeddings = self.audio_encoder(features["audio"], training=training)
        return video_embeddings, audio_embeddings

    def compute_loss(self, embeddings1, embeddings2):
        logits = tf.matmul(embeddings1, embeddings2, transpose_b=True) / self.temperature
        similarity_matrix = tf.matmul(embeddings2, embeddings2, transpose_b=True)
        targets = tf.keras.activations.softmax(similarity_matrix / (2 * self.temperature))
        loss = tf.keras.losses.categorical_crossentropy(
            y_true=targets, y_pred=logits, from_logits=True
        )
        return loss

    def train_step(self, features):
        with tf.GradientTape() as tape:
            video_embeddings, audio_embeddings = self(features, training=True)
            video_audio_loss = self.compute_loss(video_embeddings, audio_embeddings)
            total_loss = video_audio_loss
        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        self.loss_tracker.update_state(total_loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, features):
        video_embeddings, audio_embeddings = self(features, training=False)
        video_audio_loss = self.compute_loss(video_embeddings, audio_embeddings)
        total_loss = video_audio_loss
        self.loss_tracker.update_state(total_loss)
        return {"loss": self.loss_tracker.result()}


In [None]:
video_encoder = create_video_encoder()
audio_encoder = create_audio_encoder()


In [None]:

triple_encoder = TripleEncoder(video_encoder, audio_encoder, temperature=1.0)

triple_encoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))


In [None]:
num_epochs = 50
history = triple_encoder.fit(dataset, epochs=num_epochs)

triple_encoder.save("C:/Users/giris/Downloads/RESEARCH/Personality/MODEL/triple_encoder_model")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




INFO:tensorflow:Assets written to: C:/Users/giris/Downloads/RESEARCH/Personality/MODEL/triple_encoder_model\assets


INFO:tensorflow:Assets written to: C:/Users/giris/Downloads/RESEARCH/Personality/MODEL/triple_encoder_model\assets


In [None]:
loaded_model = tf.keras.models.load_model("C:/Users/giris/Downloads/RESEARCH/Personality/MODEL/triple_encoder_model")

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
video_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Video_Interview_features.csv')
audio_features_df = pd.read_csv('C:/Users/giris/Downloads/RESEARCH/Personality/personality_data/Extracted_Audio_features.csv')

video_features = video_features_df.iloc[:, 1:-5].values  
audio_features = audio_features_df.iloc[:, 1:-5].values 

new_data = {"video": video_features, "audio": audio_features}

new_video_embeddings, new_audio_embeddings = loaded_model(new_data)

In [None]:
print("Shape of new video embeddings:", new_video_embeddings.shape)
print("Shape of new audio embeddings:", new_audio_embeddings.shape)


Shape of new video embeddings: (38, 120)
Shape of new audio embeddings: (38, 120)
