**Data manipulation, vizualization and analysis**

In [None]:
%load_ext autoreload
%autoreload 2
# imports for config
import yaml

# imports for data manipulation
import pandas as pd
import numpy as np

# imports for making figures
import plotly.graph_objects as go

# imports for statistical analysis
import statistical_analysis
import scipy



In [None]:
def get_config():
    with open("config.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
    return config

config = get_config()

fingerpad_df = pd.read_excel(config['fingerpad'])

**Data manipulation**

In [None]:
# drop the unnamed column containing the overall experiment no.
fingerpad_df.drop(columns='Unnamed: 0', inplace=True)

# set the index to the experiment ID for better sorting
fingerpad_df.set_index('experiment ID', inplace=True)

In [None]:
fingerpad_df = fingerpad_df.T

In [None]:
fingerpad_df

In [None]:
fingerpad_df = fingerpad_df.groupby(np.arange(len(fingerpad_df.columns))//3, axis=1).mean()

In [None]:
fingerpad_df

In [None]:
fingerpad_df.rename({0:'HS1', 1:'C_HS1', 2:'HS2', 3:'C_HS2', 4:'HS3', 5:'C_HS3', 6:'HS4', 7:'C_HS4'}, axis=1, inplace=True)

In [None]:
fingerpad_df.T

In [None]:
# initialize a plotly histogram
fig = go.Figure()
fig.add_trace(go.Histogram(x=fingerpad_df['HS1'].values, name = fingerpad_df['HS1'].name))
fig.add_trace(go.Histogram(x=fingerpad_df['C_HS1'].values, name = fingerpad_df['C_HS1'].name))

fig.update_layout(barmode='stack')
fig.show()

**Statistical analysis**

To check whether the data form a normal distribution, an anderson-darling test is performed (obtained from Emile Apol)

In [None]:
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['HS1'].values)
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['C_HS1'].values)

**Data follows a normal distribution**:

2-sample (Welch) t-test (2 datasets, control and hand-sanitizers)

In [None]:
statistical_analysis.ind_t_test(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

**Data does not follow a normal distribution**:

Mann-whitney U test (2 datasets, control and hand-sanitizers)

In [None]:
statistical_analysis.mann_withney(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

**Visual analysis**

In [None]:
sanitizer_list = []
sanitizer_values = []
control_list = []
control_values = []


for sanitizer in range(0, 8):
    if sanitizer % 2 == 0:
        sanitizer_list.append(fingerpad_df.keys()[sanitizer])
        sanitizer_values.append(fingerpad_df.mean().values[sanitizer])
    else:
        control_list.append(fingerpad_df.keys()[sanitizer])
        control_values.append(fingerpad_df.mean().values[sanitizer])

In [None]:
from scipy.stats import sem
fig = go.Figure()
fig.add_trace(go.Bar(x=sanitizer_list,
                y=sanitizer_values,
                name='Hand Sanitizer',
                marker_color='rgb(55, 83, 109)',error_y=dict(type='constant', value=sem(fingerpad_df['HS1'].values)) 
                ))
fig.add_trace(go.Bar(x=sanitizer_list,
                y=control_values,
                name='Control',
                marker_color='rgb(26, 118, 255)', error_y=dict(type='constant', value=sem(fingerpad_df['C_HS1'].values))
                ))

fig.update_layout(
    title='Evaluation of hand sanitizers vs. control',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of colonies',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=20,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()