**Data manipulation, vizualization and analysis**

In [2]:
%load_ext autoreload
%autoreload 2
# imports for config
import yaml

# imports for data manipulation
import pandas as pd
import numpy as np

# imports for making figures
import plotly.graph_objects as go

# imports for statistical analysis
import statistical_analysis
import scipy



In [3]:
def get_config():
    with open("config.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
    return config

config = get_config()

fingerpad_df = pd.read_excel(config['fingerpad'])

**Data manipulation**

In [4]:
# drop the unnamed column containing the overall experiment no.
fingerpad_df.drop(columns='Unnamed: 0', inplace=True)

# set the index to the experiment ID for better sorting
fingerpad_df.set_index('experiment ID', inplace=True)

In [5]:
fingerpad_df = fingerpad_df.T

In [6]:
fingerpad_df

experiment ID,HS1-1,HS1-2,HS1-3,C_HS1-1,C_HS1-2,C_HS1-3,HS2-1,HS2-2,HS2-3,C_HS2-1,...,HS3-3,C_HS3-1,C_HS3-2,C_HS3-3,HS4-1,HS4-2,HS4-3,C_HS4-1,C_HS4-2,C_HS4-2.1
1,42.0,120.0,,26.0,28.0,,,,,,...,,,,,,,,,,
2,79.0,90.0,90.0,6.0,30.0,90.0,,,,,...,,,,,,,,,,
3,3.0,11.0,3.0,7.0,30.0,3.0,,,,,...,,,,,,,,,,
4,6.0,76.0,1.0,12.0,81.0,1.0,,,,,...,,,,,,,,,,
5,17.0,22.0,9.0,40.0,5.0,9.0,,,,,...,,,,,,,,,,
6,57.0,43.0,100.0,62.0,27.0,47.0,,,,,...,,,,,,,,,,


In [7]:
fingerpad_df = fingerpad_df.groupby(np.arange(len(fingerpad_df.columns))//3, axis=1).mean()

  fingerpad_df = fingerpad_df.groupby(np.arange(len(fingerpad_df.columns))//3, axis=1).mean()


In [8]:
fingerpad_df

Unnamed: 0,0,1,2,3,4,5,6,7
1,81.0,27.0,,,,,,
2,86.333333,42.0,,,,,,
3,5.666667,13.333333,,,,,,
4,27.666667,31.333333,,,,,,
5,16.0,18.0,,,,,,
6,66.666667,45.333333,,,,,,


In [9]:
fingerpad_df.rename({0:'HS1', 1:'C_HS1', 2:'HS2', 3:'C_HS2', 4:'HS3', 5:'C_HS3', 6:'HS4', 7:'C_HS4'}, axis=1, inplace=True)

In [10]:
fingerpad_df.T

Unnamed: 0,1,2,3,4,5,6
HS1,81.0,86.333333,5.666667,27.666667,16.0,66.666667
C_HS1,27.0,42.0,13.333333,31.333333,18.0,45.333333
HS2,,,,,,
C_HS2,,,,,,
HS3,,,,,,
C_HS3,,,,,,
HS4,,,,,,
C_HS4,,,,,,


In [11]:
# initialize a plotly histogram
fig = go.Figure()
fig.add_trace(go.Histogram(x=fingerpad_df['HS1'].values, name = fingerpad_df['HS1'].name))
fig.add_trace(go.Histogram(x=fingerpad_df['C_HS1'].values, name = fingerpad_df['C_HS1'].name))

fig.update_layout(barmode='stack')
fig.show()

**Statistical analysis**

To check whether the data form a normal distribution, an anderson-darling test is performed (obtained from Emile Apol)

In [12]:
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['HS1'].values)
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['C_HS1'].values)

The data is normally distributed! p=0.2997886276168647
The data is normally distributed! p=0.7724820231411117


**Data follows a normal distribution**:

2-sample (Welch) t-test (2 datasets, control and hand-sanitizers)

In [13]:
statistical_analysis.ind_t_test(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

There is not a significant different between control and hand-sanitizers H0 is true p=0.27107765283903995


**Data does not follow a normal distribution**:

Mann-whitney U test (2 datasets, control and hand-sanitizers)

In [14]:
statistical_analysis.mann_withney(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

There is not a significant different between control and hand-sanitizers H0 is true p=0.5887445887445888


**Visual analysis**

In [15]:
sanitizer_list = []
sanitizer_values = []
control_list = []
control_values = []


for sanitizer in range(0, 8):
    if sanitizer % 2 == 0:
        sanitizer_list.append(fingerpad_df.keys()[sanitizer])
        sanitizer_values.append(fingerpad_df.mean().values[sanitizer])
    else:
        control_list.append(fingerpad_df.keys()[sanitizer])
        control_values.append(fingerpad_df.mean().values[sanitizer])

In [17]:
from scipy.stats import sem
fig = go.Figure()
fig.add_trace(go.Bar(x=sanitizer_list,
                y=sanitizer_values,
                name='Hand Sanitizer',
                marker_color='rgb(55, 83, 109)',error_y=dict(type='constant', value=sem(fingerpad_df['HS1'].values)) 
                ))
fig.add_trace(go.Bar(x=sanitizer_list,
                y=control_values,
                name='Control',
                marker_color='rgb(26, 118, 255)', error_y=dict(type='constant', value=sem(fingerpad_df['C_HS1'].values))
                ))

fig.update_layout(
    title='Evaluation of hand sanitizers vs. control',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of colonies',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=20,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()
fig.write_image("images/fig1.jpeg")

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido
