In [1]:
import sys
print("Python version:", sys.version)

import matplotlib.pyplot as plt

import pandas as pd
print("pandas version:", pd.__version__)

import matplotlib
print("matplotlib version:", matplotlib.__version__)

import numpy as np
print("NumPy version:", np.__version__)

import scipy as sp
print("SciPy version:", sp.__version__)

import IPython
print("IPython version:", IPython.__version__)

from deepface import DeepFace

import soundfile as sf
import pyloudnorm as pyln
import seaborn as sns
import vaderSentiment
import math 
import json
import os
import cv2
import tqdm.notebook as tqdm
import subprocess
from moviepy.editor import *
import speech_recognition as sr



Python version: 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
pandas version: 1.3.4
matplotlib version: 3.4.3
NumPy version: 1.20.3
SciPy version: 1.7.1
IPython version: 7.29.0


# Chat sentiment analysis


In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [3]:
Analyser = SentimentIntensityAnalyzer() ##create analyser

## Merge Top 100 Twitch emotes with sentiment into lexicon

In [4]:
emotelex = pd.read_csv('emote_average.tsv',sep='\t') ##read twitch emote tsv into dataframe
emotelex['word'] = emotelex['word'].str.lower() ##lower case as it has to be to be inserted into lexicon
emotedict = dict(zip(emotelex.word, emotelex.sentiment)) ##convert dataframe to dictionary 
emojilex = pd.read_csv('emoji_average.tsv',sep='\t') ##read twitch emoji tsv into dataframe
emojidict = dict(zip(emojilex.word, emojilex.sentiment)) ##convert dataframe to dictionary 
Analyser.lexicon.update(emotedict) ##merge lexicons together
Analyser.lexicon.update(emojidict) ##merge lexicons together

In [5]:
def pullClipChat(jsonFile):
    data = json.load(open(jsonFile,encoding='utf-8'))
    df = pd.DataFrame(data["comments"])
    d = {'message': pd.json_normalize(df.message).body, 'time':df.content_offset_seconds}
    df = pd.DataFrame(data=d)
    return df,data

In [6]:
def getStreamData(data,completedClipDict,f):
    duration = data["video"]["end"] -  data["video"]["start"]
    views = int(f.split("_")[1][:-4])
    streamer = data["streamer"]["name"]
    completedClipDict.update({"duration":duration,"streamer":streamer,"views":views})
    return completedClipDict
    

In [7]:
def chatSentiment(df):
    msgsentiment = []
    neg = []
    neu = []
    pos = []
    comp = []
    for index, row in df.iterrows(): 
        msgsentiment.append(Analyser.polarity_scores(row['message']))    
        neg.append(msgsentiment[index]["neg"])
        neu.append(msgsentiment[index]["neu"])
        pos.append(msgsentiment[index]["pos"])
        comp.append(msgsentiment[index]["compound"])
    df["neg"] = neg
    df["neu"] = neu
    df["pos"] = pos
    df["comp"] = comp
    return df

In [8]:
def fillEmptyChat(df,data):
    start = int(data["video"]["start"]) ##fill rows where no messages have been sent in that time
    end = int(data["video"]["end"] + 1)
    timecounter = 0
    for i in range(start, end):
        if df["time"][timecounter] ==  i:
            timecounter = timecounter + 1
        else:
            df = df.append({"message":"","time":i,"neg":0,"neu":0,"pos":0,"comp":0}, ignore_index=True)
    return df

In [9]:
def addResults(row,cumResults):
    cumResults[0] = cumResults[0] + row[2]
    cumResults[1] = cumResults[1] + row[3]
    cumResults[2] = cumResults[2] + row[4]
    cumResults[3] = cumResults[3] + row[5]
    

In [10]:
def cumulativeGenerate(df):
    cumulativedf = pd.DataFrame(columns=["time","count","neg","neu","pos","comp"])
    cumResults = [0,0,0,0]
    currentTime = df["time"][0]
    count=0
    df = df.sort_values(by=["time"])
    for index, row in df.iterrows(): 
        if currentTime == row["time"]:
            count = count + 1
            addResults(row,cumResults)
        else:
            cumulativedf = cumulativedf.append({"time":currentTime,"count":count,"neg":cumResults[0],"neu":cumResults[1],"pos":cumResults[2],"comp":cumResults[3]}, ignore_index=True)
            currentTime = row["time"]
            cumResults = [0,0,0,0]
            if row["message"] == '':
                count = 0
            else:
                 count = 1
            addResults(row,cumResults)
    cumulativedf = cumulativedf.drop(0) 
    return cumulativedf

In [11]:
def percentageCount(buffer):
    buffer["count"] = buffer["count"]/(buffer["count"].sum())
    return buffer

In [12]:
def cumulativeDivision(cumulativedf):
    dflen = len(cumulativedf.index)
    division = math.ceil(dflen/10)
    dividedChatSent = cumulativedf.drop(columns = 'time')
    completedClipDict = {}
    for i in range(1, 11):
        newkey = {'count':'count'+str(i),'neg':'neg'+str(i),'neu':'neu'+str(i),'pos':'pos'+str(i),'comp':'comp'+str(i)}
        x = int((i-1)*division)
        y = int(i*division)
        total = dividedChatSent.iloc[x:y].sum()
        buffer = total["count"]
        total = total.divide(division)
        total['count'] = buffer
        totaldict = total.to_dict()
        completedClipDict.update(dict((newkey[key], value) for (key, value) in totaldict.items()))
    print(completedClipDict)
    return completedClipDict

In [13]:
def emotionDetection(clipPath):
    faceEmotionDf = pd.DataFrame(columns=["angry","disgust","fear","happy","sad","surprise","neutral"])
    capClip = cv2.VideoCapture(clipPath)
    totalFrames = int(capClip.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(capClip.get(cv2.CAP_PROP_FPS))
    for frame in range(totalFrames):
        success, image = capClip.read()
        if success == False:
            break
        if frame % fps == 0:
            emotionStat = DeepFace.analyze(img_path = image, actions = ['emotion'],enforce_detection = False)
            faceEmotionDf = faceEmotionDf.append(emotionStat[0]["emotion"],ignore_index=True)
    return faceEmotionDf

In [14]:
def emotionDivision(faceEmotionDf,completedClipDict):
    dflen = len(faceEmotionDf.index)
    division = dflen/10
    count=0
    for i in range(1, 11):
        newkey = {'angry':'angry'+str(i),'disgust':'disgust'+str(i),'fear':'fear'+str(i),'happy':'happy'+str(i),'sad':'sad'+str(i),'surprise':'surprise'+str(i),'neutral':'neutral'+str(i)}
        x = int((i-1)*division)
        y = int(i*division)
        total = faceEmotionDf.iloc[x:y].sum()
        total = total.divide(division)
        totaldict = total.to_dict()
        completedClipDict.update(dict((newkey[key], value) for (key, value) in totaldict.items()))
    return completedClipDict
    


In [15]:
def audioSentiment(completedClipDict,clipPath):
    audioclip = AudioFileClip(clipPath)
    r = sr.Recognizer()
    audioclip.write_audiofile("testclip.wav")
    with sr.AudioFile("testclip.wav") as source:
        audioRecogonizer = r.record(source)
    speechTranscript = r.recognize_google(audioRecogonizer)
    speechPolarity=(Analyser.polarity_scores(speechTranscript))
    completedClipDict.update(speechPolarity)
    return completedClipDict

In [16]:
def getLoudness(completedClipDict):
    loudData, rate = sf.read("testclip.wav") # load audio (with shape (samples, channels))
    meter = pyln.Meter(rate) # create meter
    loudness = meter.integrated_loudness(loudData) # measure loudness
    completedClipDict.update({"loudness":loudness})
    return completedClipDict

In [None]:
# assign directory
directory = './Clips'
clipIndex = 0
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        if f.endswith('.mp4'):
            clipPath = f
            df,data = pullClipChat(f[:-3]+'json')
            df = chatSentiment(df)
            print(df)
            df = fillEmptyChat(df,data)
            df = cumulativeGenerate(df)
            df = percentageCount(df)
            completedClipDict = cumulativeDivision(df)
            faceEmotionDf = emotionDetection(clipPath)
            completedClipDict = emotionDivision(faceEmotionDf,completedClipDict)
            completedClipDict = audioSentiment(completedClipDict,clipPath)
            duration = getStreamData(data,completedClipDict,f)
            completedClipDict = getLoudness(completedClipDict)
            if clipIndex ==0:
                fDf = pd.DataFrame(completedClipDict,index=[clipIndex])
            else:
                fDf = fDf.append(completedClipDict,ignore_index=True)
            clipIndex = clipIndex+1

                  message     time    neg    neu    pos    comp
0               Huge heal  19522.0  0.000  0.303  0.697  0.3182
1             died again?  19522.0  0.783  0.217  0.000 -0.5574
2                you suck  19523.0  0.744  0.256  0.000 -0.4404
3                skillRip  19524.0  0.000  1.000  0.000  0.0000
4                   LMFAO  19524.0  0.000  0.000  1.000  0.5423
..                    ...      ...    ...    ...    ...     ...
251              SIT KEKW  19570.0  0.000  1.000  0.000  0.0000
252  looooooooooooooooool  19570.0  0.000  1.000  0.000  0.0000
253                  sike  19571.0  0.000  1.000  0.000  0.0000
254                   SIT  19571.0  0.000  1.000  0.000  0.0000
255           LOOOOOOOOOL  19572.0  0.000  1.000  0.000  0.0000

[256 rows x 6 columns]
{'count1': 0.04255319148936171, 'neg1': 0.4211666666666667, 'neu1': 1.1293333333333333, 'pos1': 0.2828333333333333, 'comp1': -0.04421666666666666, 'count2': 0.1773049645390071, 'neg2': 0.7196666666666666, 'ne

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.97it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 32.26it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [   {   'confidence': 0.94850993,
                           'transcript': 'did I just take protect item off The '
                                         "Tick I died but there's no sure he's "
                                         "dreaming is Navan 4:00 no there's "
                                         "actually no shop I'm"},
                       {   'transcript': 'did I just take protect item off The '
                                         "Tick I died there's no sure he's "
                                         "dreaming is Navan 4:00 no there's "
                                         "actually no shop I'm"},
                       {   'transcript': 'did I just take protect item off The '
                                         "Tick I died there's no shot he's "
                                         "dreaming is Navan 4:00 no there's "
                                         "actually no shop I'm"},
            

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.78it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 23.81it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.32it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [{'confidence': 0.95467669, 'transcript': 'humble abode'}],
    'final': True}
                            message     time    neg    neu    pos    comp
0             oh uh 1 sec on my way  10217.0  0.000  1.000  0.000  0.0000
1                               lol  10218.0  0.000  0.000  1.000  0.4215
2                              lmao  10223.0  0.000  0.000  1.000  0.5994
3        that guy was bait for sure  10227.0  0.000  0.685  0.315  0.3182
4  G0T DAMN!!!!!!!!!!!!!!!!!!!!!!!!  10230.0  0.795  0.205  0.000 -0.5951
5          bdsm scouted it for sure  10237.0  0.000  0.635  0.365  0.3182
6                              lmao  10238.0  0.000  0.000  1.000  0.5994
{'count1': 0.0, 'neg1': 0.0, 'neu1': 0.0, 'pos1': 0.0, 'comp1': 0.0, 'count2': 0.0, 'neg2': 0.0, 'neu2': 0.0, 'pos2': 0.0, 'comp2': 0.0, 'count3': 0.3333333333333333, 'neg3': 0.0, 'neu3': 0.3333333333333333, 'pos3': 0.3333333333333333, 'comp3': 0.14049999999999999, 'count4': 0.0, 'neg

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.03it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [   {   'confidence': 0.93712038,
                           'transcript': 'careful speakers and kill them all '
                                         "she used to sit ok guys there's a "
                                         'huge cricket team so get the funk '
                                         "out just get out of here there's a "
                                         'huge clan will when I get better '
                                         'fighting back there is no way there '
                                         'is no way we can fight them back'},
                       {   'transcript': 'Iceland careful speakers and kill '
                                         'them all she used to sit ok guys '
                                         "there's a huge cricket team so get "
                                         'the funk out just get out of here '
                                         "there's a huge plan

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.78it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.03it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|██████████████████

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.81it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.77it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|██████████████████

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.31it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.03it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [   {   'confidence': 0.91296339,
                           'transcript': "nowadays it's huge Benson finishing "
                                         'that I watch this bro morning I '
                                         'tried I tried CrossFit now I can '
                                         "pick it up by the way it's someone "
                                         'someone gets it ok is yours oh no'},
                       {   'transcript': "today I'm your grants huge add some "
                                         'finishing that I watch this bro '
                                         'morning I tried I tried dragon '
                                         'crossbow know I can pick it up by '
                                         "the way it's someone someone gets it "
                                         'ok is yours oh no'},
                       {   'transcript': 'damn your grants huge Benson '
     

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.30it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 26.32it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.
result2:
{   'alternative': [   {   'confidence': 0.9215126,
                           'transcript': 'YouTube club morning I will be on '
                                         'YouTube eventually enough and that '
                                         'somebody could there on a video of '
                                         'somebody wants to go and clip it for '
                                         'me the current streaming just going '
                                         'to become'},
                       {   'transcript': 'YouTube club morning I will be on '
                                         'YouTube eventually enough and let '
                                         'somebody could there on a video of '
                                         'somebody wants to go in clip it for '
                                         'me the current streaming just going '
                                         'to become'},
                     

Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 28.57it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 31.25it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.78it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|███████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 29.41it/s]
Action: emotion: 100%|██████████████████

MoviePy - Writing audio in testclip.wav


                                                                                                                       

MoviePy - Done.


In [None]:
duration

In [None]:
print(completedClipDict)

In [None]:
fDf

In [None]:
tester = fDf.streamer.unique()

In [None]:
fDf = fDf.sort_values(by='streamer')

In [None]:
for i in range(0, len(tester)):
    percentageViews = []
    percentageLoudness = []
    tempDf = fDf.loc[fDf["streamer"]==tester[i]]
    maxLoud = tempDf["loudness"].min()
    maxViews = tempDf["views"].max()
    for index, row in tempDf.iterrows(): 
        percentageViews.append(tempDf["views"][index]/maxViews)
        percentageLoudness.append(tempDf["loudness"][index]/maxLoud)
    tempDf["percentageViews"] = percentageViews
    tempDf["percentageLoud"] = percentageLoudness
    tempDf = tempDf.drop(columns = ["loudness","views","streamer"])
    if i == 0:
        completedDf = tempDf
    else:
        completedDf = completedDf.append(tempDf)

In [None]:
completedDf

In [None]:
completedDf.reset_index(drop=True, inplace=True)

In [None]:
print(completedDf.iloc[5])

In [None]:

fDf.sort_values(by='streamer')

In [None]:
pd. set_option('display.max_columns', 500)