# Video Analytics

In [1]:
import pandas as pd
import numpy as np
import altair as alt

In [2]:
video_df = pd.read_csv('../sample_output/emotion_data.csv')

video_df['datetime'] = pd.to_datetime(video_df['datetime'], format="%Y-%m-%d %H:%M:%S:%f")

agg_df = video_df.groupby(['question', 'name']).size().to_frame('count').reset_index()

agg_df['percentage'] = round(agg_df['count'] / agg_df.groupby('question')['count'].transform('sum'), 2)

In [3]:
alt.Chart(agg_df, title="Facial Expression Percentage Breakdown by Question").mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3
).encode(
    x='question:O',
    y=alt.Y('percentage:Q', axis=alt.Axis(format='%')),
    color='name:N',
    tooltip=['name', alt.Tooltip('percentage:Q', format='.1%')],
    ).properties(
    width=500,
    height=300
).configure_axis(

    labelAngle=0,
    labelFontSize=14,
    titleFontSize=20
).configure_title(
    fontSize=20
)

In [4]:
eye_agg_df = video_df.groupby(['question', 'eye_contact']).size().to_frame('count').reset_index()
eye_agg_df['percentage'] = round(eye_agg_df['count'] / eye_agg_df.groupby('question')['count'].transform('sum'), 2)

In [5]:
alt.Chart(eye_agg_df, title="Eye Contact Percentage Breakdown by Question").mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3
).encode(
    x='question:O',
    y=alt.Y('percentage:Q', axis=alt.Axis(format='%')),
    color='eye_contact:N',
    tooltip=['eye_contact', alt.Tooltip('percentage:Q', format='.1%')],
    ).properties(
    width=500,
    height=300
).configure_axis(

    labelAngle=0,
    labelFontSize=14,
    titleFontSize=20
).configure_title(
    fontSize=20
)

## Ordinal Scoring

* Assign happy sad, etc a score
* multiply with eye contact

In [6]:
factor = 10/7

emotion_map = {
    'happy':factor*7, 
    'neutral':factor*6, 
    'surprise':factor*5, 
    'sad':factor*4, 
    'angry':factor*3, 
    'disgust':factor*2, 
    'fear':factor*1
 
}


emotion_calc = agg_df[['question', 'name', 'percentage']].copy()
emotion_calc['name'] = emotion_calc['name'].replace(emotion_map)

emotion_calc['emotion_score'] = emotion_calc['name']*emotion_calc['percentage']

emotion_calc = emotion_calc.groupby(['question'])['emotion_score'].sum().to_frame('emotion_score')

emotion_calc = emotion_calc.reset_index()

In [7]:
alt.Chart(emotion_calc, title="Emotion Score by Question").mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3
).encode(
    x='question:O',
    y=alt.Y('emotion_score:Q', axis=alt.Axis(format='.1f')),
    tooltip=['question', alt.Tooltip('emotion_score:Q', format='.1f')],
    ).properties(
    width=500,
    height=300
).configure_axis(

    labelAngle=0,
    labelFontSize=14,
    titleFontSize=20
).configure_title(
    fontSize=20
)

In [8]:
emotion_calc

Unnamed: 0,question,emotion_score
0,1,9.0
1,2,6.757143
2,3,9.671429
3,4,9.085714
4,5,7.614286


In [9]:
eye_factor = 10/3

eye_map = {
    'center':eye_factor*3, 
    'blink':eye_factor*2, 
    'left':eye_factor*1, 
    'right':eye_factor*1,  
}

eye_calc = eye_agg_df[['question', 'eye_contact', 'percentage']].copy()
eye_calc['eye_contact'] = eye_calc['eye_contact'].replace(eye_map)

eye_calc['eye_contact_score'] = eye_calc['eye_contact']*eye_calc['percentage']

eye_calc = eye_calc.groupby(['question'])['eye_contact_score'].sum().to_frame('eye_contact_score')

eye_calc = eye_calc.reset_index()

In [10]:
eye_calc

Unnamed: 0,question,eye_contact_score
0,1,8.666667
1,2,7.866667
2,3,7.633333
3,4,7.566667
4,5,6.566667


In [11]:
alt.Chart(eye_calc, title="Eye Contact Score by Question").mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3
).encode(
    x='question:O',
    y=alt.Y('eye_contact_score:Q', axis=alt.Axis(format='.1f')),
    tooltip=['question', alt.Tooltip('eye_contact_score:Q', format='.1f')],
    ).properties(
    width=500,
    height=300
).configure_axis(

    labelAngle=0,
    labelFontSize=14,
    titleFontSize=20
).configure_title(
    fontSize=20
)

In [181]:
video_df = emotion_calc.merge(eye_calc, on='question', how='inner').set_index('question')

video_df["score"] = video_df.mean(axis=1)

video_df.reset_index(inplace=True)

video_score = round(video_df["score"].mean(), 1)

In [182]:
video_score

8.0

In [183]:
video_df

Unnamed: 0,question,emotion_score,eye_contact_score,score
0,1,9.0,8.666667,8.833333
1,2,6.757143,7.866667,7.311905
2,3,9.671429,7.633333,8.652381
3,4,9.085714,7.566667,8.32619
4,5,7.614286,6.566667,7.090476


In [185]:
video_bars = alt.Chart(video_df, title="Score by Question").mark_bar(
        cornerRadiusTopLeft=3,
        cornerRadiusTopRight=3
    ).encode(
        x='question:O',
        y=alt.Y('score:Q', axis=alt.Axis(format='.1f'), scale=alt.Scale(domain=[0, 10])),
        tooltip=['question', alt.Tooltip('score:Q', format='.1f')],
        )
video_text = video_bars.mark_text(
    baseline='middle',
    dx=0, # Nudges text to right so it doesn't appear on top of the bar
    dy=-8,
    size=15
).encode(
    text=alt.Text('score:Q',  format='.1f')
)

(video_bars + video_text).configure_title(
        fontSize=14
    ).configure_axis(
        labelAngle=0,
        labelFontSize=11,
        titleFontSize=11)

# Writing Assessment

In [15]:
write_df = pd.read_csv('../sample_output/writing_data.csv')


## Sklearn


In [16]:
sample = write_df['answer'].loc[0]

In [17]:
sample

'I will group them according to their level of priority and prioritize them. If the task is urgent and critical, I will finish it first. For the complicated task, I will break down the task into several pieces and finish it step by step. If I know I cannot finish it within a day, I will communicate with my supervisor to see if the deadline can be extended or if some colleagues can give help.'

## SpaCy

In [18]:
import spacy
from spacy import displacy

In [19]:
nlp = spacy.load("en_core_web_sm")

# Process whole documents
text = (sample)
doc = nlp(text)

# Analyze syntax
noun = [chunk.text for chunk in doc.noun_chunks]
verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"]
nouns = [token.lemma_ for token in doc if token.pos_ == "NOUN"]


In [42]:
def get_keywords(kw_lemma, sample):
    
    # expected answer
    ans = nlp(' '.join(kw_lemma))
    kw_lemma_tup = [(token.lemma_, token.pos_) for token in ans if token.pos_ == "NOUN" or token.pos_ == "VERB"] 
    d1 = {}
    
    kw_ans = [kw[0] for kw in kw_lemma_tup]
    
    d1['text'] = ' '.join(kw_ans)
    d1['ents'] = [{'start': d1['text'].find(kw[0]), 'end': d1['text'].find(kw[0])+len(kw[0]), 'label':kw[1]} for kw in kw_lemma_tup]
    d1['title'] = "Expected Keywords (Lemmatized)"

    expected = [d1]

    #colors = {"VERB": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"}

    colors = {"VERB":'#e36262', "NOUN":'#a8f280'}

    options = {"ents": ["VERB", "NOUN"], "colors": colors}
    
    html_exp = displacy.render(expected, style='ent', manual=True, options=options)
    
    
    # answer
    doc = nlp(sample)
    d2 = {}
    
    d2['text'] = sample
    d2['ents'] = [{'start':sample.find(token.text), 'end':sample.find(token.text) + len(token.text), 'label':"MATCH"} for token in doc if token.lemma_ in kw_ans]
    d2['title'] = "Candidate's Answer"
    
    answer = [d2]
    
    colors_ans = {"MATCH":"linear-gradient(90deg, #aa9cfc, #fc9ce7)"}

    options_ans = {"ents": ["MATCH"], "colors": colors_ans}
    
    html_ans = displacy.render(answer, style='ent', manual=True, options=options_ans)

    hit = len(d2['ents'])
    
    return html_exp, html_ans, hit
    
    
    
    

In [40]:
kw_lemma = ['priority', 'supervisor', 'prioritizing', 'communicate', 'helping', 'grouping', 'extending', 'deadline', 'report', 'liaise', 'continue', 'persist']

In [41]:
html_exp, html_ans = get_keywords(kw_lemma, sample)

6


In [23]:
kw_ans

['priority', 'supervisor', 'prioritize', 'help', 'group', 'extend', 'deadline']

In [24]:
doc.text

'I will group them according to their level of priority and prioritize them. If the task is urgent and critical, I will finish it first. For the complicated task, I will break down the task into several pieces and finish it step by step. If I know I cannot finish it within a day, I will communicate with my supervisor to see if the deadline can be extended or if some colleagues can give help.'

[{'start': 7, 'end': 12, 'label': 'MATCH'},
 {'start': 46, 'end': 54, 'label': 'MATCH'},
 {'start': 59, 'end': 69, 'label': 'MATCH'},
 {'start': 307, 'end': 317, 'label': 'MATCH'},
 {'start': 332, 'end': 340, 'label': 'MATCH'},
 {'start': 348, 'end': 356, 'label': 'MATCH'},
 {'start': 388, 'end': 392, 'label': 'MATCH'}]

In [26]:
sample.find("will")

2

In [99]:
sentence = [{'text': 'The cat jumped quickly over the wall and ran.',
    'ents': [{'start': 8, 'end':14, 'label': ''}, {'start': 41, 'end':44, 'label': 'KEYWORD'}],
    'title': "Answer 1"}]

colors = {"KEYWORD": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"}

options = {"ents": ["KEYWORD", ''], "colors": colors}

displacy.render(sentence, style='ent', manual=True, options=options)

In [26]:
len(sentence[0]['text'])

45

In [67]:
key_1 = ['priority', 'supervisor', 'prioritizing', 'communicate', 'helping', 'grouping', 'extending', 'deadline', 'report', 'liaise', 'continue', 'persist']
key_2 = ['admit', 'rectify', 'prevent', 'source', 'reflect', 'acknowledge', 'assisting', 'honest', 'discussion', 'learn']
key_3 = ['helpful', 'friendly', 'teaching', 'help', 'contribute', 'sharing', 'knowledge', 'communicate', 'training', 'learn']
key_4 = ['justify', 'calm', 'opinions', 'understand', 'facts', 'explain', 'suggest', 'honest', 'discussing', 'communicating']
key_5 = ['honest', 'initiate', 'understanding', 'advicing', 'discussion', 'cause', 'assessing', 'listening', 'chance', 'communicating']

kw_lemmas = [key_1, key_2, key_3, key_4, key_5]

In [64]:
write_df["question"].iloc[4]

'Assume you are the boss and one of your employees have been severely underperforming, what will you do?'

In [65]:
write_df["answer"].iloc[4]

'I will sit down with him or her and have an honest conversation. A will initiate a discussion on what is causing the underperformance and why is it happening. I will not judge a book by its cover as they may be some very sensitive things are he or she is facing personally. After understanding the situation, I will assess and provide advice to that employee on how to get back on track.'

In [55]:
answers = write_df['answer'].tolist()

In [69]:
from spacytextblob.spacytextblob import SpacyTextBlob

nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('spacytextblob')


[(['really', 'horrible'], -1.0, 1.0, None),
 (['worst', '!'], -1.0, 1.0, None),
 (['really', 'good'], 0.7, 0.6000000000000001, None),
 (['happy'], 0.8, 1.0, None)]

In [98]:
text = answers[1]
doc = nlp(text)

for sent in doc.sents:
    sub = nlp(sent.text)
    print(sub._.polarity)

-0.6999999999999998
0.0
0.0


In [147]:
writing_results_df = pd.read_csv("../sample_output/writing_results.csv")

In [148]:
writing_results_df = writing_results_df[['question', 'hit_rate', 'polarity']]

In [149]:
writing_results_df

Unnamed: 0,question,hit_rate,polarity
0,1,0.5,-0.081111
1,2,0.6,-0.233333
2,3,0.4,0.152841
3,4,0.6,0.036111
4,5,0.4,0.114889


In [150]:
hit_rate = alt.Chart(writing_results_df, title="Hit Rate by Question").mark_bar(
        cornerRadiusTopLeft=3,
        cornerRadiusTopRight=3
    ).encode(
        x='question:O',
        y=alt.Y('hit_rate:Q', axis=alt.Axis(format='.1f'), scale=alt.Scale(domain=[0, 1])),
        tooltip=['question', alt.Tooltip('hit_rate:Q', format='.1f')],
        ).configure_axis(
        labelAngle=0,
        labelFontSize=11,
        titleFontSize=11
        
    ).configure_title(
        fontSize=14
    )

In [151]:
sentiment = alt.Chart(writing_results_df, title="Sentiment by Question").mark_bar(
        cornerRadiusTopLeft=3,
        cornerRadiusTopRight=3
    ).encode(
        x='question:O',
        y=alt.Y('polarity:Q', axis=alt.Axis(format='.1f')),
        color=alt.condition(
        alt.datum.polarity >= 0,  # If the year is 1810 this test returns True,
        alt.value('green'),     # which sets the bar orange.
        alt.value('red')   # And if it's not true it sets the bar steelblue.
    ),
        tooltip=['question', alt.Tooltip('polarity:Q', format='.1f')],
        ).configure_axis(
        labelAngle=0,
        labelFontSize=11,
        titleFontSize=11
        
    ).configure_title(
        fontSize=14
    )

In [152]:
writing_results_df["score"] = 5 + writing_results_df['hit_rate']*4 + writing_results['polarity']

In [174]:
write_score = alt.Chart(writing_results_df, title="Score by Question").mark_bar(
        cornerRadiusTopLeft=3,
        cornerRadiusTopRight=3
    ).encode(
        x='question:O',
        y=alt.Y('score:Q', axis=alt.Axis(format='.1f'), scale=alt.Scale(domain=[0, 10])),
        tooltip=['question', alt.Tooltip('score:Q', format='.1f')],
        )
write_text = write_score.mark_text(
    baseline='middle',
    dx=0, # Nudges text to right so it doesn't appear on top of the bar
    dy=-5,
    size=15
).encode(
    text=alt.Text('score:Q',  format='.1f')
)

(write_score + write_text).configure_title(
        fontSize=14
    ).configure_axis(
        labelAngle=0,
        labelFontSize=11,
        titleFontSize=11)