In [81]:
import pandas as pd
import numpy as np
import plotly.express as px
from tqdm.auto import tqdm
from scipy import stats

import jupyter_black

jupyter_black.load()

In [82]:
df = pd.read_csv(
    r"/Users/yotamhadari/Desktop/SOLIO/Group_1_SOLIO_0408.csv", index_col=[0, 1]
)
group_df = pd.read_csv(
    r"/Users/yotamhadari/Desktop/SOLIO/Device_type_group_1.csv", index_col=[0]
)

In [83]:
group_dict = {}
for group in group_df.iloc[:, 0].drop_duplicates():
    group_dict[group] = list(group_df.loc[group_df.iloc[:, 0] == group].index)

group_dict
# group_dict = {"real": [1, 5, 7, 9, 16, 17, 13, 14, 21], "sham": [2, 3, 4, 18, 20]}

{'real': [1, 5, 7, 9, 16, 17, 21, 13, 14], 'sham': [2, 3, 4, 18, 20]}

In [84]:
df["group"] = [group_df.loc[i, "Device type"] for i in df.index.get_level_values(0)]
df_reals = df.loc[df["group"] == "real"].drop("group", axis=1)
df_shams = df.loc[df["group"] == "sham"].drop("group", axis=1)

In [85]:
a = df_reals["Pain VAS"]
b = df_shams["Pain VAS"]

In [86]:
t, pt = stats.ttest_ind(df_reals, df_shams, nan_policy="omit", random_state=42)
pt

masked_array(data=[0.05028433, 0.27484498, 0.34964966, 0.00821671,
                   0.21707306, 0.20064142, 0.8631002 , 0.01972261,
                   0.22622036, 0.86442278, 0.15173047],
             mask=False,
       fill_value=1e+20)

In [87]:
u, pu = stats.mannwhitneyu(df_reals, df_shams, nan_policy="omit")
pu

array([0.04286173, 0.37377577, 0.41251719, 0.03420416, 0.16433368,
       0.23721627, 0.79777562, 0.02860011, 0.73204854, 0.49188224,
       0.36529668])

## Creating difference dataframes and running analysis

In [88]:
df = pd.read_csv(
    r"/Users/yotamhadari/Desktop/SOLIO/Group_1_SOLIO_0408.csv", index_col=[0, 1]
)


def create_indiviudal_user_dfs(df):
    """Create a dictionary where keys are user_n and values and individual answer df for that user."""
    users_dfs = {}
    for user in df.index.get_level_values(0).drop_duplicates():  # Used later
        users_dfs[user] = df.loc[df.index.get_level_values(0) == user]
    return users_dfs


users_dfs = create_indiviudal_user_dfs(df)

In [89]:
diff_users = {}
for user in users_dfs.keys():
    user_df = users_dfs[user]
    user_df_vas = user_df["Pain VAS"].diff(periods=1)
    user_df_rest = user_df.iloc[:, 1:].diff(5)
    user_df = pd.concat((user_df_vas, user_df_rest), axis=1)
    diff_users[user] = user_df
diff_df = pd.concat(diff_users).droplevel(0)
diff_df.dropna(how="all", inplace=True)

In [90]:
diff_df["group"] = [
    group_df.loc[i, "Device type"] for i in diff_df.index.get_level_values(0)
]
diff_df_reals = diff_df.loc[diff_df["group"] == "real"].drop("group", axis=1)
diff_df_shams = diff_df.loc[diff_df["group"] == "sham"].drop("group", axis=1)

In [91]:
t, pt = stats.ttest_ind(
    diff_df_reals, diff_df_shams, nan_policy="omit", random_state=42
)
pt

masked_array(data=[0.89811369, 0.80572892, 0.83634946, 0.61523497,
                   0.47796129, 0.18243975, 0.6458223 , 0.47039149,
                   0.20241567, 0.40444223, 0.90253256],
             mask=False,
       fill_value=1e+20)

In [92]:
u, pu = stats.mannwhitneyu(diff_df_reals, diff_df_shams, nan_policy="omit")
pu

array([0.43656435, 0.92273998, 0.59690568, 0.79710649, 0.62388602,
       0.23182971, 0.60088819, 0.53669614, 0.5911679 , 0.43297155,
       0.82024611])

In [93]:
pct_change_users = {}
for user in users_dfs.keys():
    user_df = users_dfs[user]
    user_df_vas = user_df["Pain VAS"].pct_change(periods=1)
    user_df_rest = user_df.iloc[:, 1:].pct_change(5)
    user_df = pd.concat((user_df_vas, user_df_rest), axis=1)
    pct_change_users[user] = user_df
pct_change_df = pd.concat(diff_users).droplevel(0)
pct_change_df.dropna(how="all", inplace=True)

In [94]:
pct_change_df["group"] = [
    group_df.loc[i, "Device type"] for i in pct_change_df.index.get_level_values(0)
]
pct_change_df_reals = diff_df.loc[diff_df["group"] == "real"].drop("group", axis=1)
pct_change_df_shams = diff_df.loc[diff_df["group"] == "sham"].drop("group", axis=1)

In [95]:
t, pct_tp = stats.ttest_ind(
    pct_change_df_reals, pct_change_df_shams, nan_policy="omit", random_state=42
)
pct_tp

masked_array(data=[0.89811369, 0.80572892, 0.83634946, 0.61523497,
                   0.47796129, 0.18243975, 0.6458223 , 0.47039149,
                   0.20241567, 0.40444223, 0.90253256],
             mask=False,
       fill_value=1e+20)

In [96]:
u, pct_pu = stats.mannwhitneyu(
    pct_change_df_reals, pct_change_df_shams, nan_policy="omit"
)
pct_pu


array([0.43656435, 0.92273998, 0.59690568, 0.79710649, 0.62388602,
       0.23182971, 0.60088819, 0.53669614, 0.5911679 , 0.43297155,
       0.82024611])

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])