**Data manipulation, vizualization and analysis**

In [48]:
%load_ext autoreload
%autoreload 2
# imports for config
import yaml

# imports for data manipulation
import pandas as pd
import numpy as np

# imports for making figures
import plotly.graph_objects as go

# imports for statistical analysis
import statistical_analysis



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [49]:
def get_config():
    with open("config.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
    return config

config = get_config()

fingerpad_df = pd.read_excel(config['fingerpad'])

**Data manipulation**

In [50]:
# drop the unnamed column containing the overall experiment no.
fingerpad_df.drop(columns='Unnamed: 0', inplace=True)

# set the index to the experiment ID for better sorting
fingerpad_df.set_index('experiment ID', inplace=True)

In [54]:
fingerpad_df = fingerpad_df.T

In [57]:
fingerpad_df = fingerpad_df.groupby(np.arange(len(fingerpad_df.columns))//3, axis=1).mean()


DataFrame.groupby with axis=1 is deprecated. Do `frame.T.groupby(...)` without axis instead.



In [61]:
fingerpad_df.rename({0:'HS1', 1:'C_HS1', 2:'HS2', 3:'C_HS2', 4:'HS3', 5:'C_HS3', 6:'HS4', 7:'C_HS4'}, axis=1, inplace=True)

In [71]:
fingerpad_df.T

Unnamed: 0,1,2,3,4,5,6
HS1,42.0,79.0,3.0,6.0,19.5,57.0
C_HS1,26.0,6.0,7.0,12.0,22.5,62.0
HS2,,,,,,
C_HS2,,,,,,
HS3,,,,,,
C_HS3,,,,,,
HS4,,,,,,
C_HS4,,,,,,


In [64]:
# initialize a plotly histogram
fig = go.Figure()
fig.add_trace(go.Histogram(x=fingerpad_df['HS1'].values, name = fingerpad_df['HS1'].name, nbinsx=10))
fig.add_trace(go.Histogram(x=fingerpad_df['C_HS1'].values, name = fingerpad_df['C_HS1'].name))

fig.update_layout(barmode='stack')
fig.show()

**Statistical analysis**

To check whether the data form a normal distribution, an anderson-darling test is performed (obtained from Emile Apol)

In [66]:
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['HS1'].values)
statistical_analysis.DS_AndersonDarling_test_normal(fingerpad_df['C_HS1'].values)

The data is normally distributed! p=0.6058959534565813
The data is normally distributed! p=0.09506097868193401


**Data follows a normal distribution**:

2-sample (Welch) t-test (2 datasets, control and hand-sanitizers)

In [120]:
statistical_analysis.ind_t_test(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

There is not a significant different between control and hand-sanitizers H0 is true p=0.4490432430527298


**Data does not follow a normal distribution**:

Mann-whitney U test (2 datasets, control and hand-sanitizers)

In [68]:
statistical_analysis.mann_withney(fingerpad_df['HS1'].values, fingerpad_df['C_HS1'].values)

There is not a significant different between control and hand-sanitizers H0 is true p=0.8725590308923818


**Visual analysis**

In [114]:
sanitizer_list = []
sanitizer_values = []
control_list = []
control_values = []


for sanitizer in range(0, 8):
    if sanitizer % 2 == 0:
        sanitizer_list.append(fingerpad_df.keys()[sanitizer])
        sanitizer_values.append(fingerpad_df.mean().values[sanitizer])
    else:
        control_list.append(fingerpad_df.keys()[sanitizer])
        control_values.append(fingerpad_df.mean().values[sanitizer])

In [127]:
fig = go.Figure()
fig.add_trace(go.Bar(x=sanitizer_list,
                y=sanitizer_values,
                name='Hand Sanitizer',
                marker_color='rgb(55, 83, 109)',error_y=dict(type='data', array=[1, 0.5, 1.5]) 
                ))
fig.add_trace(go.Bar(x=sanitizer_list,
                y=control_values,
                name='Control',
                marker_color='rgb(26, 118, 255)', error_y=dict(type='data', array=[1, 0.5, 1.5])
                ))

fig.update_layout(
    title='Evaluation of hand sanitizers vs. control',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Number of colonies',
        titlefont_size=16,
        tickfont_size=14,
    ),
    legend=dict(
        x=20,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15, # gap between bars of adjacent location coordinates.
    bargroupgap=0.1 # gap between bars of the same location coordinate.
)
fig.show()

In [102]:
fingerpad_df.mean().values[0]

34.416666666666664

In [100]:
fingerpad_df.keys()[1]

'C_HS1'