# Summary of DAS

This notebook contains a list of Python methods that were introduced in the DAS programming exercises.

### Import the libraries

In [None]:
import numpy as np  # import auxiliary library, typical idiom
import pandas as pd  # import the Pandas library, typical idiom

from statsmodels.distributions.empirical_distribution import ECDF

from scipy.interpolate import interp1d
from scipy.ndimage.filters import gaussian_filter1d
from scipy import stats

# next command ensures that plots appear inside the notebook
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()  # set Seaborn defaults
plt.rcParams['figure.figsize'] = 10, 5  # default hor./vert. size of plots, in inches
plt.rcParams['lines.markeredgewidth'] = 1  # to fix issue with seaborn box plots; needed after import seaborn

# <span class="section">1.</span> Filtering


In [None]:
df_nasdaq = pd.read_csv('../datasets/NASDAQ.csv', parse_dates=[0])
df_nasdaq = df_nasdaq.set_index('Date')[['Close']]
df_nasdaq.columns = ['close']

In [None]:
ax = df_nasdaq['close'].plot()
ax.set_ylabel('Index')
ax.set_title('Closing NASDAQ Composite index',fontsize=16);

## Gaussian filter


In [None]:
df_nasdaq['close_filtered'] = gaussian_filter1d(df_nasdaq['close'], sigma=30, mode='nearest')

In [None]:
ax_nasdaq = df_nasdaq['close'].plot(color='orange')
df_nasdaq['close_filtered'].plot(ax = ax_nasdaq, color='black');
ax_nasdaq.set_title('Closing NASDAQ Composite index',fontsize=16);
ax_nasdaq.set_ylabel('Index')

## Rolling windows


In [None]:
df_nasdaq['median_filtered'] = df_nasdaq['close'].rolling(5).median()

In [None]:
ax_nasdaq = df_nasdaq['close'].plot(color='orange')
df_nasdaq['median_filtered'].plot(ax=ax_nasdaq, color='black');
ax_nasdaq.set_title('Closing NASDAQ Composite index',fontsize=16);
ax_nasdaq.set_ylabel('Index');

## Filtering mouse trajectories


In [None]:
df_paths = pd.read_csv('../datasets/paths.csv', parse_dates=[0])

## Computing changes and finding maxima


In [None]:
df_nasdaq['close_diff'] = df_nasdaq['close'].diff()

In [None]:
date_min = df_nasdaq['close_diff'].idxmin()

In [None]:
df_nasdaq.loc[date_min, 'close_diff']

In [None]:
date_max = df_nasdaq['close_diff'].idxmax()

In [None]:
df_nasdaq['close_diff_cumulative'] = df_nasdaq['close_diff'].cumsum()
df_nasdaq['change_since_beginning'] = df_nasdaq['close'] - df_nasdaq['close'].iloc[0]

In [None]:
df_nasdaq['abs_change'] = (df_nasdaq['close_diff']**2)**(1/2)
df_nasdaq['total_abs_change'] = df_nasdaq['abs_change'].cumsum()

## Approximating derivatives and finding trends


In [None]:
ax_marg = df_nasdaq['close_diff'].plot()
ax_marg.set_ylabel('Change in index')
ax_marg.set_title('Change in index between consecutive days');

In [None]:
df_nasdaq['close_filtered_deriv'] = gaussian_filter1d(df_nasdaq['close'], sigma=30, order=1, mode='nearest')

In [None]:
ax_deriv = df_nasdaq['close_filtered_deriv'].plot()
ax_deriv.set_title('Approximate derivative of NASDAQ index', fontsize=16);
ax_deriv.set_ylabel('Change in index per day');

# <span class="section">2.</span> Data aggregation


### Data: mouse trajectories


In [None]:
df_fitts = pd.read_csv('../datasets/fitts.csv')

### Multi-level indexing


In [None]:
df_fitts.set_index(['user','trial','t'], inplace=True)

### Data: user properties


In [None]:
df_user_props = pd.read_csv('../datasets/user_props.csv')

In [None]:
df_user_props.set_index('user',inplace = True)

### Conversion to readable table entries


In [None]:
dict_use_tue_laptop = {0 : False, 1 : True}
dict_right_handed = {0 : False, 1 : True}
dict_gender = {0 : 'Male', 1 : 'Female', 2 : 'Other'}

In [None]:
df_user_props['use_tue_laptop'] = df_user_props['use_tue_laptop'].astype(int)

#### The _Pandas_ function **`map()`**


In [None]:
df_user_props['use_tue_laptop'] = df_user_props['use_tue_laptop'].map(dict_use_tue_laptop)

### Data: user trial properties


In [None]:
dict_input_method = { 0 : 'trackpad', 1 : 'mouse' }

In [None]:
df_user_trial_props = pd.read_csv('../datasets/user_trial_props.csv')

In [None]:
df_speeds = df_fitts.groupby(['user','trial'])[['approximate_speed']].mean()

### Computing several aggregated quantities at once


In [None]:
df_features = df_fitts.groupby(['user','trial']).agg({'approximate_speed':['mean','median'], 'x':['max']})

In [None]:
df_features.columns=['appr_speed_mean', 'appr_speed_median', 'x_max']

### Joining two dataframes


In [None]:
df_fitts = pd.read_csv('../datasets/fitts.csv')
df_fitts.set_index(['user','trial'], inplace=True)
df_results = df_fitts.join(df_features)

# <span class="section">3.</span> Empirical cumulative distribution functions


In [None]:
df_experiment = pd.DataFrame([5,3,5,7,1], columns=['outcome']) 

In [None]:
df_experiment['outcome'].value_counts()

In [None]:
df_counts = pd.DataFrame( df_experiment['outcome'].value_counts().rename('counts') )

In [None]:
df_counts.sort_index(inplace=True)

In [None]:
df_counts['ecdf'] = df_counts['counts'].cumsum() / df_counts['counts'].sum()

In [None]:
ax = df_counts[['ecdf']].plot(drawstyle='steps-post')
ax.set_xlim(0,10)

In [None]:
df_ecdf = df_counts[['ecdf']].copy()
df_ecdf.loc[-2000,'ecdf'] = 0
df_ecdf.loc[2000,'ecdf'] = 1
df_ecdf

In [None]:
df_ecdf.sort_index(inplace=True)

In [None]:
ax = df_ecdf['ecdf'].plot( drawstyle ='steps-post' )
ax.set_xlim(0, 10)
ax.set_xlabel('outcome')
ax.set_title('ECDF', fontsize=14);