# Analysis

In [None]:
import datetime
import random
import matplotlib.pyplot as plt
%matplotlib inline

## Load the Data

In [None]:
from faces import FaceShard
from emotions import EmotionShard
from behavior import WindowShard, ScreenShard

from mirror import Mirror

from config import EMOTIONLOG, WINDOWLOG, SCREENSHOT_DIR, MIRRORLOG, FACE_DIR


shards = []
shards.append(EmotionShard(logfile=EMOTIONLOG))
shards.append(FaceShard(FACE_DIR))
shards.append(WindowShard(logfile=WINDOWLOG))
shards.append(ScreenShard(logdir=SCREENSHOT_DIR))
mirror = Mirror(shards=shards, lens=None, logfile=MIRRORLOG)

In [None]:
states = mirror.remember(from_date=datetime.datetime(year=2020, month=11, day=1))

## Have a Look at States with Specific Emotions

In [None]:
# Let's consider the more interesting ones by filtering

emotions = set([state['emotions'][0]['emotion'] for state in states if len(state['emotions'])>0])

# Filter by detected emotion
ids_by_emotion = {}
for emotion in emotions:
    ids_by_emotion[emotion] = [state['ID'] for state in states
                               if len(state['emotions'])>0 and state['emotions'][0]['emotion']==emotion]
    
ids_by_emotion.keys()

In [None]:
emotion = 'anger'

ids = sorted(ids_by_emotion[emotion])
print("%d relevant logs" % len(ids))

state_by_id = {state['ID']: state for state in states}

# Find for which IDs we have a capture available
ids = [i for i in ids if 'faces' in state_by_id[i] and len(state_by_id[i]['faces'])]
print("%d relevant logs with captures" % len(ids))

# Find for which IDs we also have a screenshot
ids = [i for i in ids if 'screenshot' in state_by_id[i] and state_by_id[i]['screenshot'] is not None]
print("%d relevant logs with screenshots" % len(ids))

In [None]:
id_ = random.choice(ids)
print(id_)

state = state_by_id[id_]
print("Detected emotion:", state['emotions'][0]['emotion'])
print("Behavior at the time:", state['active_window'])

#plt.figure(figsize=(15,15))
#plt.imshow(state['screenshot'][:, :, ::-1])

## Display Emotions over Time

In [None]:
x = []
y = []

for state in states:
    #x.append(id_)
    if len(state['emotions'])>0:
        x.append(datetime.datetime.fromisoformat(state['timestamp']))
        y.append(state['emotions'][0]['emotion'])
    
plt.plot(x, y, 'b.')

## Correlations

Let's have a look at the behavior information and see if any terms correlate with any emotions.

In [None]:
emotion = 'neutral'

vocab = []
vocab_set = set(vocab)
X = []
Y = []

for state in states:
    if len(state['emotions'])<1 or 'title' not in  state['active_window']:
        continue
    
    info = state['active_window']['title']
    X.append([0]*len(vocab))
    
    # Simple tokenization
    tokens = [t.lower() for t in info.split()]
    # Create a bag of words vector
    # (This implementation is not efficient at all, but we are dealing with small datasets for now)
    for token in tokens:
        if token not in vocab_set:
            vocab.append(token)
            vocab_set.update([token])
            X[-1].append(0)
        X[-1][vocab.index(token)] += 1
    
    if state['emotions'][0]['emotion']==emotion:
        Y.append(1)
    else:
        Y.append(0)
        
for i in range(len(X)):
    if len(X[i])<len(vocab):
        X[i].extend([0]*(len(vocab)-len(X[i])))
        
import numpy as np
X = np.array(X)
Y = np.array(Y)

In [None]:
correlations = []

for ix,token in enumerate(vocab):
    correlations.append(np.corrcoef(X[:,ix], Y)[0,1])

args = np.argsort(correlations)

print("Most negatively correlating:")
for pos in args[:10]:
    print("-", vocab[pos], correlations[pos])

print("\nMost positively correlating:")
for pos in args[::-1][:10]:
    print("-", vocab[pos], correlations[pos])