# Dotin EDA : Mouse Track Heatmaps

In [1]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.ndimage.filters import gaussian_filter
import seaborn as sns

In [3]:
import os 

In [16]:
os.listdir('../Data/Clean Data')

['.ipynb_checkpoints',
 'merged_data_user_level.csv',
 'merged_data_user_level_V2.csv',
 'mouse_flat_v2.csv',
 'mouse_flat_V3.csv',
 'mouse_user_direction.csv',
 'votes_v2.csv']

In [17]:
#call in data
mt=pd.read_csv('../Data/Clean Data/mouse_flat_V3.csv')
v=pd.read_csv('../Data/Raw/votes.csv')

In [None]:
#Create two new features to capture x,y coordinates but improved, to account for variability in window size

#ratio of cord_x / window_x * 100 - % of width
mt['percent_cord_x']=(mt['cord_x']/mt['window_x'])*100

#ratio of cord_y / window_y * 100 - % of length
mt['percent_cord_y']=(mt['cord_y']/mt['window_y'])*100

**Overall Mouse Activity Heatmap Per User**

In [None]:
def overall_heatmap (userid):
    user_cords=mt[mt.user_id==userid]
    cords=user_cords[['cord_x', 'cord_y', 'window_x', 'window_y']]
    
    x= user_cords['percent_cord_x'].to_numpy()
    y= user_cords['percent_cord_y'].to_numpy()
    
    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    
    print(userid)
    plt.clf()
    plt.imshow(heatmap.T, extent=extent, origin='lower')
    plt.show()

In [None]:
for userid in mt.user_id.unique():
    overall_heatmap(userid)

**Mouse Movements Heatmap Per User**

In [None]:
def movements_heatmap (userid):
    user_cords=mt[mt.user_id==userid]
    cords=user_cords[['cord_x', 'cord_y', 'window_x', 'window_y']]
    user_movements=user_cords[user_cords.action=='m']
    
    x= user_movements['percent_cord_x'].to_numpy()
    y= user_movements['percent_cord_y'].to_numpy()
    
    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    
    print(userid)
    plt.clf()
    plt.imshow(heatmap.T, extent=extent, origin='lower')
    plt.show()

In [None]:
for userid in mt.user_id.unique():
    movements_heatmap(userid)

**Clicks Heatmap Per User**

In [None]:
def clicks_heatmap (userid):
    user_cords=mt[mt.user_id==userid]
    cords=user_cords[['cord_x', 'cord_y', 'window_x', 'window_y']]
    user_clicks=user_cords[user_cords.action=='c']
    
    x= user_clicks['percent_cord_x'].to_numpy()
    y= user_clicks['percent_cord_y'].to_numpy()
    
    heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
    
    print(userid)
    plt.clf()
    plt.imshow(heatmap.T, extent=extent, origin='lower')
    plt.show()

In [None]:
for userid in mt.user_id.unique():
    clicks_heatmap(userid)

**Insights:**<br>
1. Both movement and click patterns vary from user to user.
2. On average, mouse movements are heavily concentrated on the left side of the screen.
3. Most of the mouse activity happens between x coordinates 400 and 1200 - varies depending on screen size.
4. A lot of the mouse movements and clicks form 2 clear vertical lines on the following x coordinates: 500-700 and 1000.

**Next steps:**<br>
1. It will be more insightful to analyze the movements based on the different parts of the survey, since the layout and location of response fields varies.
2. Analyze mouse movements and clicks depending on the different devices, i.e. screen size

### Scatterplots of Mouse Paths with Color Codes for Action Type

In [None]:
def action_type_scatter(userid):
    user_cords=mt[mt.user_id==userid]
    cords=user_cords[['cord_x', 'cord_y', 'window_x', 'window_y']]
    
    sns.lmplot( x="percent_cord_x", y="percent_cord_y", data=user_cords, fit_reg=False, hue='action', 
               legend=False, palette=dict(c="#CD5C5C", m="#E9967A", s="#FFA07A"))
 
    plt.legend(loc='lower right')
    print(userid)
    plt.show()

In [None]:
for userid in mt.user_id.unique():
    action_type_scatter(userid)