In [743]:
import sys
print("Python version:", sys.version)

import matplotlib.pyplot as plt

import pandas as pd
print("pandas version:", pd.__version__)

import matplotlib
print("matplotlib version:", matplotlib.__version__)

import numpy as np
print("NumPy version:", np.__version__)

import scipy as sp
print("SciPy version:", sp.__version__)

import IPython
print("IPython version:", IPython.__version__)

from deepface import DeepFace

import vaderSentiment
import math 
import json
import os
import cv2
import tqdm.notebook as tqdm
import subprocess
from moviepy.editor import *
import speech_recognition as sr



Python version: 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
pandas version: 1.3.4
matplotlib version: 3.4.3
NumPy version: 1.20.3
SciPy version: 1.7.1
IPython version: 7.29.0


# Chat sentiment analysis


In [744]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [745]:
Analyser = SentimentIntensityAnalyzer() ##create analyser

## Merge Top 100 Twitch emotes with sentiment into lexicon

In [746]:
emotelex = pd.read_csv('emote_average.tsv',sep='\t') ##read twitch emote tsv into dataframe
emotelex['word'] = emotelex['word'].str.lower() ##lower case as it has to be to be inserted into lexicon
emotedict = dict(zip(emotelex.word, emotelex.sentiment)) ##convert dataframe to dictionary 
emojilex = pd.read_csv('emoji_average.tsv',sep='\t') ##read twitch emoji tsv into dataframe
emojidict = dict(zip(emojilex.word, emojilex.sentiment)) ##convert dataframe to dictionary 
Analyser.lexicon.update(emotedict) ##merge lexicons together
Analyser.lexicon.update(emojidict) ##merge lexicons together

In [747]:
def pullClipChat(jsonFile):
    data = json.load(open(jsonFile,encoding='utf-8'))
    df = pd.DataFrame(data["comments"])
    d = {'message': pd.json_normalize(df.message).body, 'time':df.content_offset_seconds}
    df = pd.DataFrame(data=d)
    return df,data

In [748]:
def getDuration(data,completedClipDict):
    duration = data["video"]["end"] -  data["video"]["start"]
    completedClipDict.update({"duration":duration})
    return completedClipDict
    

In [749]:
def chatSentiment(df):
    msgsentiment = []
    neg = []
    neu = []
    pos = []
    comp = []
    for index, row in df.iterrows(): 
        msgsentiment.append(Analyser.polarity_scores(row['message']))    
        neg.append(msgsentiment[index]["neg"])
        neu.append(msgsentiment[index]["neu"])
        pos.append(msgsentiment[index]["pos"])
        comp.append(msgsentiment[index]["compound"])
    df["neg"] = neg
    df["neu"] = neu
    df["pos"] = pos
    df["comp"] = comp
    return df

In [750]:
def fillEmptyChat(df,data):
    start = int(data["video"]["start"]) ##fill rows where no messages have been sent in that time
    end = int(data["video"]["end"] + 1)
    timecounter = 0
    for i in range(start, end):
        if df["time"][timecounter] ==  i:
            timecounter = timecounter + 1
        else:
            df = df.append({"message":"","time":i,"neg":0,"neu":0,"pos":0,"comp":0}, ignore_index=True)
    return df

In [751]:
def addResults(row,cumResults):
    cumResults[0] = cumResults[0] + row[2]
    cumResults[1] = cumResults[1] + row[3]
    cumResults[2] = cumResults[2] + row[4]
    cumResults[3] = cumResults[3] + row[5]
    

In [752]:
def cumulativeGenerate(df):
    cumulativedf = pd.DataFrame(columns=["time","count","neg","neu","pos","comp"])
    cumResults = [0,0,0,0]
    currentTime = df["time"][0]
    count=0
    df = df.sort_values(by=["time"])
    for index, row in df.iterrows(): 
        if currentTime == row["time"]:
            count = count + 1
            addResults(row,cumResults)
        else:
            cumulativedf = cumulativedf.append({"time":currentTime,"count":count,"neg":cumResults[0],"neu":cumResults[1],"pos":cumResults[2],"comp":cumResults[3]}, ignore_index=True)
            currentTime = row["time"]
            cumResults = [0,0,0,0]
            if row["message"] == '':
                count = 0
            else:
                 count = 1
            addResults(row,cumResults)
    cumulativedf = cumulativedf.drop(0) 
    return cumulativedf

In [753]:
def percentageCount(buffer):
    buffer["count"] = buffer["count"]/(buffer["count"].sum())
    return buffer

In [754]:
def cumulativeDivision(cumulativedf):
    dflen = len(cumulativedf.index)
    division = math.ceil(dflen/10)
    dividedChatSent = cumulativedf.drop(columns = 'time')
    completedClipDict = {}
    for i in range(1, 11):
        newkey = {'count':'count'+str(i),'neg':'neg'+str(i),'neu':'neu'+str(i),'pos':'pos'+str(i),'comp':'comp'+str(i)}
        x = (i-1)*division
        y = i*division
        if y>dflen:
            division = dflen % division 
            print(y,' split ', division,' split e',dflen)
        total = dividedChatSent.iloc[x:y].sum()
        buffer = total["count"]
        total = total.divide(division)
        total['count'] = buffer
        totaldict = total.to_dict()
        completedClipDict.update(dict((newkey[key], value) for (key, value) in totaldict.items()))
    print(completedClipDict)
    return completedClipDict

In [755]:
def emotionDetection(clipPath):
    faceEmotionDf = pd.DataFrame(columns=["angry","disgust","fear","happy","sad","surprise","neutral"])
    capClip = cv2.VideoCapture(clipPath)
    totalFrames = int(capClip.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(capClip.get(cv2.CAP_PROP_FPS))
    for frame in range(totalFrames):
        success, image = capClip.read()
        if success == False:
            break
        if frame % fps == 0:
            emotionStat = DeepFace.analyze(img_path = image, actions = ['emotion'],enforce_detection = False)
            faceEmotionDf = faceEmotionDf.append(emotionStat[0]["emotion"],ignore_index=True)
    return faceEmotionDf

In [756]:
def emotionDivision(faceEmotionDf,completedClipDict):
    dflen = len(faceEmotionDf.index)
    division = math.ceil(dflen/10)
    count=0
    for i in range(1, 11):
        newkey = {'angry':'angry'+str(i),'disgust':'disgust'+str(i),'fear':'fear'+str(i),'happy':'happy'+str(i),'sad':'sad'+str(i),'surprise':'surprise'+str(i),'neutral':'neutral'+str(i)}
        x = (i-1)*division
        y = i*division
        if y>dflen:
            division = dflen % division 
            print(y,' split ', division,' split ',dflen)
        total = faceEmotionDf.iloc[x:y].sum()
        total = total.divide(division)
        totaldict = total.to_dict()
        completedClipDict.update(dict((newkey[key], value) for (key, value) in totaldict.items()))
    return completedClipDict
    


In [757]:
def audioSentiment(completedClipDict,clipPath):
    audioclip = AudioFileClip(clipPath)
    r = sr.Recognizer()
    audioclip.write_audiofile("testclip.wav")
    with sr.AudioFile("testclip.wav") as source:
        audioRecogonizer = r.record(source)
    speechTranscript = r.recognize_google(audioRecogonizer)
    speechPolarity=(Analyser.polarity_scores(speechTranscript))
    completedClipDict.update(speechPolarity)
    return completedClipDict

In [758]:
# assign directory
directory = './Clips'
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        if f.endswith('.mp4'):
            clipPath = f
            df,data = pullClipChat(f[:-3]+'json')
            df = chatSentiment(df)
            df = fillEmptyChat(df,data)
            df = cumulativeGenerate(df)
            df = percentageCount(df)
            completedClipDict = cumulativeDivision(df)
            faceEmotionDf = emotionDetection(clipPath)
            completedClipDict = emotionDivision(faceEmotionDf,completedClipDict)
            completedClipDict = audioSentiment(completedClipDict,clipPath)
            duration = getDuration(data,completedClipDict)

                                               message     time    neg    neu  \
0                                             ????????  18005.0  0.000  1.000   
1                                               no way  18005.0  0.688  0.312   
2                                                  omg  18006.0  0.000  1.000   
3                                        ?????????????  18006.0  0.000  1.000   
4    @TRIPS_ENDING nah this guy said he is new to p...  18006.0  0.135  0.865   
..                                                 ...      ...    ...    ...   
158                                  l000000000000000l  18037.0  0.000  1.000   
159                                       CLIP IT KEKW  18037.0  0.000  1.000   
160                                           WAITTTTT  18037.0  0.000  1.000   
161                                          ?????????  18037.0  0.000  1.000   
162        omg! omg! no way! taht huge! thats massive!  18038.0  0.189  0.514   

       pos    comp  
0    0

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 15.62it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.32it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.03it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.31it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.78it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.78it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 25.00it/s]
Action: emotion: 100%|██████████████████

40  split  3  split  39
MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [   {   'confidence': 0.9406718,
                           'transcript': 'how has he just survived that I I '
                                         "don't know I really don't know I'm "
                                         'not sure spec when I mean yeah '
                                         'obviously looking at hindsight I '
                                         "should that respect it's all good "
                                         "though cos you're like in this "
                                         'world'},
                       {   'transcript': 'how has he just survive that I I '
                                         "don't know I really don't know I'm "
                                         'not sure spec when I mean yeah '
                                         'obviously looking at hindsight I '
                                         "should that respect it's all good "
                        

In [759]:
duration

{'count1': 0.0,
 'neg1': 0.0,
 'neu1': 0.0,
 'pos1': 0.0,
 'comp1': 0.0,
 'count2': 0.06179775280898876,
 'neg2': 0.20575,
 'neu2': 1.62225,
 'pos2': 0.422,
 'comp2': 0.04832499999999999,
 'count3': 0.06741573033707865,
 'neg3': 0.0,
 'neu3': 1.61025,
 'pos3': 0.38975000000000004,
 'comp3': 0.22047499999999998,
 'count4': 0.05056179775280899,
 'neg4': 0.28225,
 'neu4': 1.39275,
 'pos4': 0.325,
 'comp4': -0.06755,
 'count5': 0.056179775280898875,
 'neg5': 0.25,
 'neu5': 1.75,
 'pos5': 0.0,
 'comp5': -0.146475,
 'count6': 0.016853932584269662,
 'neg6': 0.29825,
 'neu6': 0.20174999999999998,
 'pos6': 0.0,
 'comp6': -0.253075,
 'count7': 0.056179775280898875,
 'neg7': 0.25,
 'neu7': 1.75,
 'pos7': 0.5,
 'comp7': -0.07504999999999999,
 'count8': 0.2640449438202247,
 'neg8': 1.0159999999999998,
 'neu8': 7.984,
 'pos8': 2.25,
 'comp8': 0.49655000000000005,
 'count9': 0.2808988764044944,
 'neg9': 1.831,
 'neu9': 9.918999999999999,
 'pos9': 0.0,
 'comp9': -0.8657,
 'count10': 0.1460674157303370

In [760]:
print(completedClipDict)

{'count1': 0.0, 'neg1': 0.0, 'neu1': 0.0, 'pos1': 0.0, 'comp1': 0.0, 'count2': 0.06179775280898876, 'neg2': 0.20575, 'neu2': 1.62225, 'pos2': 0.422, 'comp2': 0.04832499999999999, 'count3': 0.06741573033707865, 'neg3': 0.0, 'neu3': 1.61025, 'pos3': 0.38975000000000004, 'comp3': 0.22047499999999998, 'count4': 0.05056179775280899, 'neg4': 0.28225, 'neu4': 1.39275, 'pos4': 0.325, 'comp4': -0.06755, 'count5': 0.056179775280898875, 'neg5': 0.25, 'neu5': 1.75, 'pos5': 0.0, 'comp5': -0.146475, 'count6': 0.016853932584269662, 'neg6': 0.29825, 'neu6': 0.20174999999999998, 'pos6': 0.0, 'comp6': -0.253075, 'count7': 0.056179775280898875, 'neg7': 0.25, 'neu7': 1.75, 'pos7': 0.5, 'comp7': -0.07504999999999999, 'count8': 0.2640449438202247, 'neg8': 1.0159999999999998, 'neu8': 7.984, 'pos8': 2.25, 'comp8': 0.49655000000000005, 'count9': 0.2808988764044944, 'neg9': 1.831, 'neu9': 9.918999999999999, 'pos9': 0.0, 'comp9': -0.8657, 'count10': 0.14606741573033707, 'neg10': 2.565, 'neu10': 9.222999999999999

In [761]:
fDf = pd.DataFrame(completedClipDict,index=[0])

In [762]:
fDf

Unnamed: 0,count1,neg1,neu1,pos1,comp1,count2,neg2,neu2,pos2,comp2,...,fear10,happy10,sad10,surprise10,neutral10,neg,neu,pos,compound,duration
0,0.0,0.0,0.0,0.0,0.0,0.061798,0.20575,1.62225,0.422,0.048325,...,0.789489,0.150202,78.621107,0.002295,20.423871,0.039,0.681,0.28,0.9009,38.0


In [763]:

f[:-3]+'json'

'./Clips\\Clip1_3081.json'