In [None]:
import nltk
import pandas as pd
import numpy as np
from gensim.models.keyedvectors import KeyedVectors
nltk.download('stopwords')
from nltk.corpus import stopwords

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
class MultimodalLSTM(nn.Module):
    def __init__(self, text_modal, audio_modal, video_modal, hidden_size, output_size):
        super(MultimodalLSTM, self).__init__()
        self.text_layer = nn.LSTM(input_size=text_modal, hidden_size=hidden_size, batch_first=True)
        self.audio_layer = nn.LSTM(input_size=audio_modal, hidden_size=hidden_size, batch_first=True)
        self.video_layer = nn.LSTM(input_size=video_modal, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * 3, output_size)

    def forward(self, x1, x2, x3):
        _, (h1, _) = self.text_layer(x1)
        _, (h2, _) = self.audio_layer(x2)
        _, (h3, _) = self.video_layer(x3)
        combined = torch.cat((h1[-1], h2[-1], h3[-1]), dim=0)
        output = torch.sigmoid(self.fc(combined))
        return float(output)

In [None]:
# Define a function to make predictions with the model
def make_predictions(input_data1, input_data2, input_data3):
    # Load the model
    model = MultimodalLSTM(300, 74, 388, 64, 1)
    model.load_state_dict(torch.load('./models/multimodal_lstm_model_10_epochs.pth'))
    model.eval()

    # Make predictions with the model
    output = model(input_data1, input_data2, input_data3)

    # Return the predictions
    return output

# Example usage:
input_data2 = torch.randn(1000, 74)  # Replace with your input data
input_data3 = torch.randn(1000, 388)  # Replace with your input data

output = make_predictions(text, input_data2, input_data3)
print(output)

In [None]:
import pandas as pd

from text_processing import return_tensor

return_tensor(pd.read_csv('./daic_woz/dev_data/302/302_TRANSCRIPT.csv', delimiter = '\t', encoding='utf-8', engine='python'))

In [None]:
import pandas as pd

from audio_processing import return_tensor

return_tensor(pd.read_csv('./daic_woz/dev_data/302/302_COVAREP.csv', header = None))

In [1]:
import pandas as pd
import numpy as np
import torch

def processData(data):
    X = data.iloc[:,:].values
    X = np.delete(X, 0, 1)
    X = np.delete(X, 1, 1)
    for i in range(len(X)):
        if(isinstance(X[i][5],str) or isinstance(X[i][7],str)):
            X[i] = np.zeros((1, X.shape[1]))
    return X

def scale_down(X):
  X_new = []
  size = 2
  for i in range(int(X.shape[0]/size)):
    cur_row = X[i*size]
    for j in range(1,size):
      if(i+j < X.shape[0]):
        cur_row += X[i+j]
    cur_row = cur_row/size
    X_new.append(cur_row)
  X_new = np.array(X_new)
  return X_new

def decrease_size(X):
  size = 1000
  if(X.shape[0] < size):
    dif = size - X.shape[0] 
    temp = np.zeros((dif,X.shape[1]))
    X = np.concatenate((X,temp),axis = 0)
  elif(X.shape[0] > size):
    X = X[:1000, :]
  return X

def prcs_video(au, feat, feat3d, gaze, pose):
    vid = np.concatenate((au, feat, feat3d, gaze, pose), 1)
    vid = scale_down(vid)
    vid = decrease_size(vid)
    return vid

def return_tensor(au, feat, feat3d, gaze, pose):
   return torch.tensor(prcs_video(au, feat, feat3d, gaze, pose)).to(torch.float32)

In [2]:
au = processData(pd.read_csv('./daic_woz/dev_data/302/302_CLNF_AUs.txt', delimiter = ',', engine = 'python'))
feat = processData(pd.read_csv('./daic_woz/dev_data/302/302_CLNF_features.txt', delimiter = ',', engine = 'python'))
feat3d = processData(pd.read_csv('./daic_woz/dev_data/302/302_CLNF_features3D.txt', delimiter = ',', engine = 'python'))
gaze = processData(pd.read_csv('./daic_woz/dev_data/302/302_CLNF_gaze.txt', delimiter = ',', engine = 'python'))
pose = processData(pd.read_csv('./daic_woz/dev_data/302/302_CLNF_pose.txt', delimiter = ',', engine = 'python'))

In [5]:
return_tensor(au, feat, feat3d, gaze, pose).shape

torch.Size([1000, 388])

In [4]:
import torch