In [1]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import plotly.graph_objects as go
import networkx as nx

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing import image
import nltk
nltk.download('stopwords')
from keras.preprocessing.text import Tokenizer
from gensim.models import KeyedVectors
from keras.layers import Embedding
from keras.models import Sequential
import tensorflow as tf
from mtcnn import MTCNN


from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model, load_model
import cv2
import os

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

from spektral.data.loaders import SingleLoader, Loader
from spektral.datasets.citation import Citation
from spektral.layers import GCNConv
from spektral.models.gcn import GCN
from spektral.transforms import LayerPreprocess

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\gsevr\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
df = pd.read_csv('dev_sent_emo.csv',encoding='utf-8')
df['Utterance'] = df.Utterance.str.replace('',"'")
df['gender'] = df.Speaker.apply(lambda x: 'male' if x == 'Ross' or x == 'Joey' or x == 'Chandler' else 'female')
df = df.sample(frac=0.03,ignore_index=True)
df

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,gender
0,41,"Y'know, you look nothing like I would've thoug...",Woman,surprise,positive,4,13,7,7,"00:15:56,830","00:15:58,311",female
1,160,Toby don't.,Bob,sadness,negative,14,6,8,5,"0:16:35,577","0:16:36,849",female
2,728,Tag?,Ross,neutral,neutral,69,3,7,8,"0:19:46,728","0:19:49,508",male
3,1162,"Well, it's pretty much the gist. Well, except ...",Ursula,neutral,neutral,112,2,4,1,"00:13:45,991","00:13:53,331",female
4,283,I'm sorry.,Phoebe,sadness,negative,23,10,3,6,"00:18:33,529","00:18:34,570",female
5,435,No-o-o!!,Joey,fear,negative,37,5,3,24,"0:22:12,838","0:22:13,295",male
6,815,"So it's kinda like, you're, you know.",Chandler,neutral,neutral,76,12,3,1,"00:17:00,769","00:17:08,860",male
7,779,"And shame on you! You should know better, Joey...",Ross,anger,negative,73,0,5,10,"00:10:28,461","00:10:34,257",male
8,360,"Yeah, you're great! Okay, let's take it from",Joey,joy,positive,30,8,6,20,"00:00:29,446","00:00:35,159",male
9,1067,What?,Rachel,neutral,neutral,102,1,1,2,"0:13:42,477","0:13:43,955",female


In [3]:
def preprocess(text):
    t = text.lower()
    t = re.sub('\d+',r'',t)
    t = re.sub(r'\W+',r' ',t)
    return t

lemmatizer = WordNetLemmatizer()



df['prepro'] = [' '.join([lemmatizer.lemmatize(preprocess(txt))])
                 .strip() for txt in df['Utterance']]


texts = df.prepro.values
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=16, padding='post')

In [4]:
glove_path = 'glove.6B.50d.txt'
word_vectors = KeyedVectors.load_word2vec_format(glove_path, binary=False, encoding='utf8',no_header=True)



In [5]:
embedding_dim = 50  
embedding_matrix = np.zeros((len(tokenizer.word_index) + 1, embedding_dim))

for word, i in tokenizer.word_index.items():
    if word in word_vectors:
        embedding_matrix[i] = word_vectors[word]

embedding_layer = Embedding(
    input_dim=len(tokenizer.word_index) + 1,
    output_dim=embedding_dim,
    weights=[embedding_matrix],
    input_length=16,
    trainable=False
)


model = Sequential()
model.add(embedding_layer)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics='accuracy')


In [6]:
embedding_vectors = model.predict(padded_sequences)
df['text'] = list(embedding_vectors)



In [7]:
# Load MTCNN model for face detection
detector = MTCNN()

# Load FaceNet model for face embedding extraction
facenet_model = tf.keras.applications.ResNet50(weights='imagenet')

def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img / 255.0 
    return img

def extract_face_embeddings(frame):
    faces = detector.detect_faces(frame)
    face_embeddings = []
    for face in faces:
        x, y, w, h = face['box']
        x1, y1 = max(x, 0), max(y, 0)
        x2, y2 = min(x + w, frame.shape[1]), min(y + h, frame.shape[0])
        cropped_face = frame[y1:y2, x1:x2]
        
        # Preprocess the face for FaceNet model
        preprocessed_face = preprocess_image(cropped_face)
        preprocessed_face = np.expand_dims(preprocessed_face, axis=0)
        
        # Get face embeddings using FaceNet model
        face_embedding = facenet_model.predict(preprocessed_face)
        face_embeddings.append(face_embedding)
    return face_embeddings

# Read video
folder_path = 'dev_splits_complete/'
one_face_videos = {}

for idx,row in df.iterrows():
    file_name = 'dia' + str(row['Dialogue_ID']) + '_utt' + str(row['Utterance_ID']) + '.mp4'
    video_path = folder_path + file_name
    if os.path.isfile(video_path):
        video_capture = cv2.VideoCapture(video_path)  # Provide the path to your input video

        single_video_embeddings = []  # List to store embeddings for all frames

        while True:
            ret, frame = video_capture.read()
            if not ret:
                break

            #check for more than 1 faces
            if len(detector.detect_faces(frame)) == 1:
            
                # Extract face embeddings from each frame
                extracted_embeddings = extract_face_embeddings(frame)
                
                single_video_embeddings.append(extracted_embeddings)  # Append embeddings for this frame
                
                # Display the frame with bounding boxes around detected faces
                for face in detector.detect_faces(frame):
                    x, y, w, h = face['box']
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                cv2.imshow('Video', frame)
                
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

                video_capture.release()
                cv2.destroyAllWindows()

            else:
                break
        if single_video_embeddings != []:            
            one_face_videos[file_name] = single_video_embeddings
        else:
            one_face_videos[file_name] = 'too many faces'

                






In [8]:
df['faces'] = one_face_videos.values()

df = df[df.faces != 'too many faces']

df['faces'] = df.faces.apply(lambda x: x[0][0][0])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['faces'] = df.faces.apply(lambda x: x[0][0][0])


In [9]:
# base_model = InceptionV3(weights='imagenet', include_top=False)

# model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

# def get_video_embeddings(video_path):
#     cap = cv2.VideoCapture(video_path)
#     frame_embeddings = []

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             break

#         frame = cv2.resize(frame, (299, 299))
#         x = np.expand_dims(frame, axis=0)

#         x = preprocess_input(x)

#         frame_embedding = model.predict(x)

#         frame_embeddings.append(frame_embedding)

#     cap.release()
#     return frame_embeddings



# video_embeddings_list = []
# folder_path = 'dev_splits_complete/'

# for idx,row in df.iterrows():
#     file_name = 'dia' + str(row['Dialogue_ID']) + '_utt' + str(row['Utterance_ID']) + '.mp4'
#     video_path = folder_path + file_name
#     if os.path.isfile(video_path):
#         video_embeddings_list.append(get_video_embeddings(video_path))
#     else:
#         print(f'File name {file_name} does not exist')

# df['video'] = video_embeddings_list
        


In [28]:
G = nx.Graph()


In [29]:

emotion_col = {
            'sadness':'teal',
            'surprise':'silver',
            'neutral':'lightgreen',
            'joy':'purple',
            'anger':'black',
            'disgust': 'brown',
            'fear':'yellow'
}

gender_col = {
    'male':'blue',
    'female':'pink'
}

In [10]:
for emotion in df.Emotion.unique():
    G.add_node(emotion, color=emotion_col[emotion],hover=emotion,emotion=emotion, type='emotion')


In [12]:
for gender in df.gender.unique():
    G.add_node(gender,color=gender_col[gender], hover=gender, gender=gender, type='gender')

In [30]:
for idx, row in df.iterrows():
    G.add_node(row['Utterance'], 
    text_embedding=row['text'],
    visual_embedding=row['faces'],
    color=emotion_col[row['Emotion']],
    hover=row['Utterance'],
    emotion=row['Emotion'],
    gender=row['gender'], 
    type='utterance',
    padding='zero')

In [14]:
for idx, row in df.iterrows():
    G.add_edge(row['Utterance'], row['Emotion'],color=emotion_col[row['Emotion']])
    G.add_edge(row['Utterance'], row['gender'], color=gender_col[row['gender']])


In [15]:
pos = nx.spring_layout(G)
# pos = nx.fruchterman_reingold_layout(G)


In [16]:
import textwrap

fig = go.Figure()

node_positions = {}

for node, position in pos.items():
    x, y = position  

    color = G.nodes[node]['color']  
    hover = G.nodes[node]['hover']
    hover = '<br>'.join(textwrap.wrap(hover,width=50))
    try:
        emotion = G.nodes[node]['emotion']
    except KeyError:
        try:
            gender = G.nodes[node]['gender']
        except KeyError:
            continue
    node_type = G.nodes[node]['type']

    if node_type == 'utterance':
        fig.add_trace(go.Scatter(x=[x], y=[y], marker=dict(size=10,color=color), hovertext=hover, name=emotion, hovertemplate=hover))
    else:
        fig.add_trace(go.Scatter(x=[x], y=[y], marker=dict(size=10,color=color), hovertext=hover, name=node_type, hovertemplate=hover))


for edge in G.edges():
    color = G.edges[edge]['color']
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    fig.add_trace(go.Scatter(x=[x0, x1], y=[y0, y1], mode='lines', line=dict(width=1,color=color), name='Edge'))

fig.update_layout(showlegend=False, title='Emotions Graph')

fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))

fig.show()

In [17]:
from tensorflow.keras import layers, models

# Define the Graph Convolutional Layer
class GraphConvLayer(layers.Layer):
    def __init__(self, output_dim, activation=None, **kwargs):
        self.output_dim = output_dim
        self.activation = activation
        super(GraphConvLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create weights for the layer
        self.kernel = self.add_weight(name='kernel',
                                      shape=(input_shape[1], self.output_dim),
                                      initializer='glorot_uniform',
                                      trainable=True)
        super(GraphConvLayer, self).build(input_shape)

    def call(self, x, adjacency_matrix):
        # Perform graph convolution
        support = tf.matmul(adjacency_matrix, x)
        output = tf.matmul(support, self.kernel)
        if self.activation is not None:
            output = self.activation(output)
        return output

# Create the GCN model
class GCN(models.Model):
    def __init__(self, hidden_units, num_classes):
        super(GCN, self).__init__()
        self.hidden_units = hidden_units
        self.num_classes = num_classes

        # Graph convolutional layers
        self.gcn_layers = [GraphConvLayer(units, activation='relu') for units in hidden_units]

        # Output layer
        self.output_layer = GraphConvLayer(num_classes)

    def call(self, inputs, adjacency_matrix):
        x = inputs
        for layer in self.gcn_layers:
            x = layer(x, adjacency_matrix)

        # Output layer
        output = self.output_layer(x, adjacency_matrix)
        return output


In [26]:
tf.constant(list(G.nodes["Toby don't."]['visual_embedding']))

<tf.Tensor: shape=(1000,), dtype=float32, numpy=
array([1.69251696e-04, 3.20854364e-04, 6.02129694e-05, 1.07175212e-04,
       5.20066678e-05, 5.11162682e-04, 9.15351848e-06, 3.51809322e-05,
       1.70811873e-05, 1.41600860e-04, 7.32104818e-04, 1.40703531e-04,
       5.85418020e-05, 8.10772763e-05, 1.65304737e-05, 4.90692983e-05,
       1.24995146e-04, 2.90053576e-05, 5.70635129e-05, 4.95783970e-05,
       1.94436216e-04, 1.61237956e-03, 9.71846050e-04, 2.23138006e-04,
       8.71846496e-05, 1.12381982e-04, 3.18417879e-04, 2.31957456e-04,
       1.73175940e-04, 1.90038743e-04, 4.20648830e-05, 3.30791459e-04,
       8.80999505e-05, 1.12275709e-04, 2.77832325e-04, 1.61080479e-05,
       1.23764621e-04, 1.08471859e-05, 5.87009359e-03, 3.75366508e-05,
       4.73528598e-05, 1.98641617e-04, 2.37981629e-04, 1.63575416e-04,
       5.45674266e-05, 1.23726510e-04, 3.95414427e-05, 2.28768142e-04,
       3.63352374e-05, 3.61019484e-05, 1.68537590e-04, 4.19115793e-04,
       2.91162578e-04, 5.190

In [31]:
GCNConv(16, activation='relu')(tf.constant([list(G.nodes[node]['visual_embedding']) for node in G.nodes]))

TypeError: 'int' object is not subscriptable

In [None]:
import numpy as np
from spektral.data import Dataset, Graph
from spektral.layers import GCNConv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Dense

# Create a sample dataset
class MyDataset(Dataset):
    def __init__(self):
        super().__init__()

    def read(self):
        # Define your graph data (adjacency matrix and node features)
        adjacency = np.array(nx.adjacency_matrix(G))  # Example adjacency matrix
        features = np.array({node: G.nodes[node]['color'] for node in G.nodes()})  # Example node features

        # Create a graph
        graph = Graph(x=features, a=adjacency)
        self.graphs.append(graph)

data = MyDataset()
data.read()

# Build the model
X_in = Input(shape=(data[0].x.shape[1],))
A_in = Input((None,), sparse=True)
dropout_1 = Dropout(0.5)(X_in)
graph_conv_1 = GCNConv(16, activation='relu')([dropout_1, A_in])
dropout_2 = Dropout(0.5)(graph_conv_1)
output = Dense(2, activation='softmax')(dropout_2)

model = Model(inputs=[X_in, A_in], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model (using a single graph in this example)
model.fit([data[0].x, data[0].a], data[0].y, epochs=50, batch_size=1)


ValueError: x must have shape (n_nodes, n_node_features), got rank 0