# COGS 189: Typing & Attention

## Sources:

* https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.make_interp_spline.html

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from os.path import join as pjoin
import scipy
import json

import seaborn as sns
sns.set()
sns.set_context('talk')

## Reading the Saved Dataframes

In [2]:
norm_control = pd.read_csv('dataframes/norm_control.csv')
norm_IV1 = pd.read_csv('dataframes/norm_IV1.csv')
norm_IV2 = pd.read_csv('dataframes/norm_IV2.csv')
norm_IVboth = pd.read_csv('dataframes/norm_IVboth.csv')

norm_IV1_typing = pd.read_csv('dataframes/norm_IV1_typing.csv')
norm_IV2_typing = pd.read_csv('dataframes/norm_IV2_typing.csv')
norm_IVboth_typing = pd.read_csv('dataframes/norm_IVboth_typing.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'dataframes/norm_control.csv'

# Data Analysis

## Bar Plots

### Loudness, Familiarity, Both (IVs) VS. WPM (DV)

In [None]:
avg_normTyping1 = norm_IV1_typing.mean(axis=0)
print(avg_normTyping1)

print(avg_normTyping1['WPM'])
print(avg_normTyping1['accuracy'])

In [None]:
avg_normTyping2 = norm_IV2_typing.mean(axis=0)
print(avg_normTyping2)

print(avg_normTyping2['WPM'])
print(avg_normTyping2['accuracy'])

In [None]:
avg_normTyping3 = norm_IVboth_typing.mean(axis=0)
print(avg_normTyping3)

print(avg_normTyping3['WPM'])
print(avg_normTyping3['accuracy'])

In [None]:
WPM_df = pd.DataFrame([('norm_IV1_typing', avg_normTyping1['WPM']),
                       ('norm_IV2_typing', avg_normTyping2['WPM']),
                       ('norm_IVboth_typing', avg_normTyping3['WPM'])],
         columns=('Typing', 'WPM'))

WPM_df

In [None]:
WPM_df.plot.bar(x='Typing', y='WPM', rot=0, 
                figsize=(10,5), 
                title='Loudness, Familiarity, Both VS. WPM', 
                xlabel="IV: Loudness, Familiarity, Both",
                ylabel="DV: WPM")


### Loudness, Familiarity, Both (IVs) VS. Accuracy (DV)

In [None]:
accuracy_df = pd.DataFrame([('norm_IV1_typing', avg_normTyping1['accuracy']),
                       ('norm_IV2_typing', avg_normTyping2['accuracy']),
                       ('norm_IVboth_typing', avg_normTyping3['accuracy'])],
         columns=('Typing', 'Accuracy'))

accuracy_df

In [None]:
accuracy_df.plot.bar(x='Typing', y='Accuracy', rot=0, 
                figsize=(10,5), 
                title='Loudness, Familiarity, Both VS. Accuracy', 
                xlabel="IV: Loudness, Familiarity, Both",
                ylabel="DV: Accuracy")

### Loudness, Familiarity, Both (IVs) VS. Attention (DV)

In [None]:
avg_norm_control = norm_control.mean(axis=0)
avg_norm_IV1 = norm_IV1.mean(axis=0)
avg_norm_IV2 = norm_IV2.mean(axis=0)
avg_norm_IVboth = norm_IVboth.mean(axis=0)

print(avg_norm_control)
print(avg_norm_IV1)
print(avg_norm_IV2)
print(avg_norm_IVboth)

In [None]:
avg_norm_control_avg = avg_norm_control.mean(axis=0)
avg_norm_IV1_avg = avg_norm_IV1.mean(axis=0)
avg_norm_IV2_avg = avg_norm_IV2.mean(axis=0)
avg_norm_IVboth_avg = avg_norm_IVboth.mean(axis=0)

print(avg_norm_control_avg)
print(avg_norm_IV1_avg)
print(avg_norm_IV2_avg)
print(avg_norm_IVboth_avg)

In [None]:
attention_data = {"Trial":["Control", "IV1", "IV2", "Both"],
                  "Attention":[avg_norm_control_avg, avg_norm_IV1_avg, avg_norm_IV2_avg, avg_norm_IVboth_avg]
        };     

attention_df = pd.DataFrame(data=attention_data);
attention_df

In [None]:
attention_df.plot.bar(x="Trial", y="Attention", rot=0, title="Loudness, Familiarity, Both VS. Attention");

## Line Plot: Chart of Attention vs. Time

In [None]:
arr_index = []
for i in range(30824):
    arr_index.append(float(i)/513.7)

In [None]:
attention_vs_time_0 = pd.DataFrame()
attention_vs_time_0['Control'] = norm_control['0']
attention_vs_time_0['IV1'] = norm_IV1['0']
attention_vs_time_0['IV2'] = norm_IV2['0']
attention_vs_time_0['IV_Both'] = norm_IVboth['0']

attention_vs_time_1 = pd.DataFrame()
attention_vs_time_1['Control'] = norm_control['1']
attention_vs_time_1['IV1'] = norm_IV1['1']
attention_vs_time_1['IV2'] = norm_IV2['1']
attention_vs_time_1['IV_Both'] = norm_IVboth['1']

attention_vs_time_2 = pd.DataFrame()
attention_vs_time_2['Control'] = norm_control['2']
attention_vs_time_2['IV1'] = norm_IV1['2']
attention_vs_time_2['IV2'] = norm_IV2['2']
attention_vs_time_2['IV_Both'] = norm_IVboth['2']

In [None]:
attention_vs_time_0 = attention_vs_time_0.set_index([pd.Index(arr_index)])
attention_vs_time_1 = attention_vs_time_1.set_index([pd.Index(arr_index)])
attention_vs_time_2 = attention_vs_time_2.set_index([pd.Index(arr_index)])

In [None]:
attention_vs_time_0.plot.line(figsize=(20,10), subplots = True)

In [None]:
attention_vs_time_1.plot.line(figsize=(20,10), subplots = True)

In [None]:
attention_vs_time_2.plot.line(figsize=(20,10), subplots = True)

In [None]:
# Take average of attention over all 3 participants and then compare the 4 plots

In [None]:
attention_vs_time_mean = pd.DataFrame()
attention_vs_time_mean['Control'] = (attention_vs_time_0['Control'] + attention_vs_time_1['Control'] + attention_vs_time_2['Control']) / 3
attention_vs_time_mean['IV1'] = (attention_vs_time_0['IV1'] + attention_vs_time_1['IV1'] + attention_vs_time_2['IV1']) / 3
attention_vs_time_mean['IV2'] = (attention_vs_time_0['IV2'] + attention_vs_time_1['IV2'] + attention_vs_time_2['IV2']) / 3
attention_vs_time_mean['IV_Both'] = (attention_vs_time_0['IV_Both'] + attention_vs_time_1['IV_Both'] + attention_vs_time_2['IV_Both']) / 3


In [None]:
attention_vs_time_mean.plot.line(figsize=(20,10), subplots = True)

## Hypothesis Testing

In [None]:
attention_vs_time_mean.mean()

In [None]:
attention_vs_time_mean.std()

We can observe that the mean of IV1 and IV2 are more than 2 stds away from the mean of Control, thereby, p-value < 0.05