In [34]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import plotly.graph_objects as go
import networkx as nx

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
nltk.download('stopwords')
from keras.preprocessing.text import Tokenizer
from gensim.models import KeyedVectors
from keras.layers import Embedding
from keras.models import Sequential
import tensorflow as tf

from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model
import cv2
import os

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

from spektral.data.loaders import SingleLoader, Loader
from spektral.datasets.citation import Citation
from spektral.layers import GCNConv
from spektral.models.gcn import GCN
from spektral.transforms import LayerPreprocess

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\gsevr\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
df = pd.read_csv('MELD.Raw/dev_sent_emo.csv',encoding='utf-8')
df['Utterance'] = df.Utterance.str.replace('',"'")
df['gender'] = df.Speaker.apply(lambda x: 'male' if x == 'Ross' or x == 'Joey' or x == 'Chandler' else 'female')
df = df.sample(frac=0.18,ignore_index=True)
df

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,gender
0,938,"Well umm, Alexandra has been",Monica,neutral,neutral,88,1,8,14,"0:01:13,360","0:01:15,920",female
1,311,"Really, Rachel, I was thinking of you the whol...",Monica,sadness,negative,26,9,2,2,"00:11:21,972","00:11:24,849",female
2,903,Wh-what did he say?!,Ross,surprise,negative,84,1,5,13,"00:09:39,036","00:09:41,288",male
3,680,"You know, Junior Miss is where I started. Oh, ...",Joanna,neutral,neutral,65,3,4,9,"00:04:25,598","00:04:31,603",female
4,988,Well this doesn't even smell like opium.,Phoebe,disgust,negative,92,10,6,11,"00:12:47,641","00:12:50,018",female
...,...,...,...,...,...,...,...,...,...,...,...,...
195,675,"Nope, got it and I got yours too.",Joey,neutral,neutral,64,4,6,9,"00:22:24,343","00:22:26,635",male
196,31,"What exactly were you looking for, hmm?",Ross,neutral,neutral,4,3,7,7,"00:14:40,754","00:14:43,256",male
197,689,"Yeah but, maybe it's not what we think. Maybe ...",Rachel,neutral,neutral,66,1,7,23,"00:14:38,586","00:14:45,216",female
198,884,There's a limit to how many sandwiches I can e...,Joey,neutral,neutral,82,10,6,21,"00:10:12,612","00:10:15,905",male


In [5]:
def preprocess(text):
    t = text.lower()
    t = re.sub('\d+',r'',t)
    t = re.sub(r'\W+',r' ',t)
    return t

lemmatizer = WordNetLemmatizer()



df['prepro'] = [' '.join([lemmatizer.lemmatize(preprocess(txt))])
                 .strip() for txt in df['Utterance']]


texts = df.prepro.values
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=30, padding='post')

In [6]:
glove_path = 'glove.6B.300d.txt'
word_vectors = KeyedVectors.load_word2vec_format(glove_path, binary=False, encoding='utf8',no_header=True)



In [7]:
embedding_dim = 300  
embedding_matrix = np.zeros((len(tokenizer.word_index) + 1, embedding_dim))

for word, i in tokenizer.word_index.items():
    if word in word_vectors:
        embedding_matrix[i] = word_vectors[word]

embedding_layer = Embedding(
    input_dim=len(tokenizer.word_index) + 1,
    output_dim=embedding_dim,
    weights=[embedding_matrix],
    input_length=30,
    trainable=False
)


model = Sequential()
model.add(embedding_layer)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics='accuracy')


In [8]:
embedding_vectors = model.predict(padded_sequences)
embedding_vectors.shape



(200, 30, 300)

In [60]:
type(embedding_vectors)

numpy.ndarray

In [61]:
df['embeddings'] = list(embedding_vectors)


In [10]:
G = nx.Graph()

topic_col = {
            'sadness':'teal',
            'surprise':'silver',
            'neutral':'lightgreen',
            'joy':'purple',
            'anger':'black',
            'disgust': 'brown',
            'fear':'yellow'
}

for emotion in df.Emotion.unique():
    G.add_node(emotion, color=topic_col[emotion],hover=emotion,emotion=emotion, type='emotion')


In [11]:
gender_col = {
    'male':'blue',
    'female':'pink'
}

for gender in df.gender.unique():
    G.add_node(gender,color=gender_col[gender], hover=gender, gender=gender, type='gender')

In [64]:
for idx, row in df.iterrows():
    G.add_node(row['Utterance'], 
    embedding=row['embeddings'],
    color=topic_col[row['Emotion']],
    hover=row['Utterance'],
    emotion=row['Emotion'],
    gender=row['gender'], 
    type='utterance')

In [57]:
for idx, row in df.iterrows():
    G.add_edge(row['Utterance'], row['Emotion'],color=topic_col[row['Emotion']])
    G.add_edge(row['Utterance'], row['gender'], color=gender_col[row['gender']])


In [14]:
pos = nx.spring_layout(G)
# pos = nx.fruchterman_reingold_layout(G)


In [15]:
import textwrap

fig = go.Figure()

node_positions = {}

for node, position in pos.items():
    x, y = position  

    color = G.nodes[node]['color']  
    hover = G.nodes[node]['hover']
    hover = '<br>'.join(textwrap.wrap(hover,width=50))
    try:
        emotion = G.nodes[node]['emotion']
    except KeyError:
        try:
            gender = G.nodes[node]['gender']
        except KeyError:
            continue
        node_type = G.nodes[node]['type']

    fig.add_trace(go.Scatter(x=[x], y=[y], marker=dict(size=10,color=color), hovertext=hover, name=emotion, hovertemplate=hover))

for edge in G.edges():
    color = G.edges[edge]['color']
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    fig.add_trace(go.Scatter(x=[x0, x1], y=[y0, y1], mode='lines', line=dict(width=1,color=color), name='Edge'))

fig.update_layout(showlegend=False, title='Emotions Graph')

fig.update_layout(xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))

fig.show()

In [17]:
# model
model = GCN(n_labels=len(df.Emotion.unique()))
model.compile(
    optimizer='adam',
    loss=CategoricalCrossentropy(reduction="sum"),
    weighted_metrics=["acc"],
)

In [50]:
G.nodes['What?']

{'embedding': [[-0.20016999542713165,
   0.14302000403404236,
   0.0520550012588501,
   -0.0008088399772532284,
   0.01700899936258793,
   0.014898999594151974,
   -0.25523999333381653,
   -0.17906999588012695,
   -0.04671299830079079,
   -2.0546998977661133,
   0.22617000341415405,
   0.08284900337457657,
   -0.211899995803833,
   0.19905999302864075,
   0.30946001410484314,
   0.2268799990415573,
   -0.06002600118517876,
   -0.033333998173475266,
   0.03810799866914749,
   0.2262600064277649,
   0.521589994430542,
   0.5987100005149841,
   0.45326998829841614,
   -0.04109799861907959,
   -0.40292999148368835,
   -0.07912799715995789,
   0.002533900085836649,
   -0.36041998863220215,
   0.06582300364971161,
   -0.010745000094175339,
   0.054721999913454056,
   0.5075600147247314,
   -0.646120011806488,
   -0.004589499905705452,
   -1.017300009727478,
   0.3021799921989441,
   -0.2540299892425537,
   0.09564699977636337,
   -0.047533001750707626,
   -0.32479000091552734,
   0.147300004

In [48]:
loader_tr = Loader(G)
model.fit(
    loader_tr.load(),
    steps_per_epoch=loader_tr.steps_per_epoch,
    epochs=10,
    callbacks=[EarlyStopping(patience=5, restore_best_weights=True)],
)


you are shuffling a 'Graph' object which is not a subclass of 'Sequence'; `shuffle` is not guaranteed to behave correctly. E.g., non-numpy array/tensor objects with view semantics may contain duplicates after shuffling.



KeyError: 131

In [19]:
base_model = InceptionV3(weights='imagenet', include_top=False)

model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

def get_video_embeddings(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_embeddings = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.resize(frame, (299, 299))
        x = np.expand_dims(frame, axis=0)

        x = preprocess_input(x)

        frame_embedding = model.predict(x)

        frame_embeddings.append(frame_embedding)

    cap.release()
    return frame_embeddings


In [20]:
video_embeddings_list = []
folder_path = 'dev_splits_complete/'

for idx,row in df.iterrows():
    file_name = 'dia' + str(row['Dialogue_ID']) + '_utt' + str(row['Utterance_ID']) + '.mp4'
    video_path = folder_path + file_name
    if os.path.isfile(video_path):
        video_embeddings_list.append(get_video_embeddings(video_path))
    else:
        print(f'File name {file_name} does not exist')

df['video_embeddings'] = video_embeddings_list
        



KeyboardInterrupt: 

In [23]:
df['video'] = video_embeddings_list + [None] *(len(df)-len(video_embeddings_list))

In [24]:
df

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,gender,prepro,embeddings,video
0,105,It's a witness not a perp. And no one talks li...,Gary,anger,negative,10,4,5,20,"0:07:07,593","0:07:10,096",female,it s a witness not a perp and no one talks lik...,"[[0.03328400105237961, -0.04075400158762932, -...",[[[[[0.00558225 0. 0.21521398 ... 0. ...
1,625,Well you all know that I'm a pacifist so I'm n...,Phoebe,neutral,neutral,59,2,5,20,"00:21:51,310","00:21:55,813",female,well you all know that i m a pacifist so i m n...,"[[-0.13508999347686768, 0.3590700030326843, 0....",[[[[[0. 0. 0. ... 0. ...
2,998,How're you doing?,Joey,sadness,negative,93,7,2,1,"00:15:38,479","00:15:40,980",male,how re you doing,"[[-0.28519999980926514, -0.013883000239729881,...",[[[[[0. 0. 0. ... 0. ...
3,605,"No, it's been three nights in a row.",Rachel,neutral,neutral,57,8,3,1,"00:01:45,146","00:01:47,064",female,no it s been three nights in a row,"[[-0.16843000054359436, -0.037650998681783676,...",[[[[[0. 0. 0.21535525 ... 0. ...
4,825,"No, I know.",Phoebe,neutral,neutral,77,5,8,24,"0:17:37,544","0:17:38,170",female,no i know,"[[-0.16843000054359436, -0.037650998681783676,...",[[[[[0. 0.29825893 0. ... 0.51...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,927,Ken Adams!!,Joey,surprise,positive,85,16,8,4,"00:19:18,115","00:19:19,532",male,ken adams,"[[0.5064600110054016, 0.24647000432014465, 0.2...",
196,237,This is so great.,Phoebe,joy,positive,19,16,1,23,"00:14:58,063","00:14:59,814",female,this is so great,"[[-0.20437000691890717, 0.16430999338626862, 0...",
197,160,Toby don't.,Bob,sadness,negative,14,6,8,5,"0:16:35,577","0:16:36,849",female,toby don t,"[[0.044863998889923096, 0.42858999967575073, 0...",
198,543,"Okay, maybe ask this guy.",Phoebe,neutral,neutral,50,3,6,22,"00:17:07,317","00:17:09,527",female,okay maybe ask this guy,"[[0.24993999302387238, -0.016952000558376312, ...",
