In [83]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import scipy.stats as stats
import plotly.figure_factory as ff

import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import f_oneway, ttest_ind
import itertools

In [42]:
df = pd.read_csv("received_messages.csv")
df = df.sort_values(by=["Material", "timeStamp"])

In [43]:
df

Unnamed: 0,timeStamp,Material,TxPower,RTT,RSSI
0,30125,<unknown>,50,2,-32
1,30217,<unknown>,32,3,-34
2,30460,<unknown>,26,7,-33
3,30662,<unknown>,18,8,-33
4,30847,<unknown>,6,2,-46
...,...,...,...,...,...
819,613877,Water90Gelatin10,76,1,-49
820,614125,Water90Gelatin10,70,1,-32
821,614330,Water90Gelatin10,58,1,-35
822,614536,Water90Gelatin10,50,1,-37


In [44]:
df.drop(df.loc[df.Material == "<unknown>"].index, axis=0, inplace=True)
df.reset_index(inplace=True,drop=True)

In [45]:
df.Material.unique()

array(['Empty box with air', 'W60G40-500gm', 'WATER', 'Water25Gelatin75',
       'Water60Gelatin40', 'Water90Gelatin10'], dtype=object)

In [46]:
df.TxPower.sort_values().unique()

array([ 6,  8, 18, 20, 26, 28, 32, 34, 44, 50, 58, 60, 66, 70, 76, 78],
      dtype=int64)

In [47]:
df.sort_values(by="TxPower").groupby(by="Material")["TxPower"].unique()

Material
Empty box with air    [6, 18, 26, 32, 50, 58, 70, 76]
W60G40-500gm          [8, 20, 28, 34, 44, 60, 66, 78]
WATER                 [6, 18, 26, 32, 50, 58, 70, 76]
Water25Gelatin75      [6, 18, 26, 32, 50, 58, 70, 76]
Water60Gelatin40      [8, 20, 28, 34, 44, 60, 66, 78]
Water90Gelatin10      [6, 18, 26, 32, 50, 58, 70, 76]
Name: TxPower, dtype: object

In [48]:
df["powerGroup"] = None
for material in df.Material.unique():
    for i, power in enumerate(df.loc[df.Material == material].TxPower.sort_values().unique()):
        df.loc[(df.Material == material) & (df.TxPower == power), "powerGroup"] = i
df = df.sort_values(by=["Material", "powerGroup", "timeStamp"]).reset_index(drop=True)

In [49]:
df.sort_values(by="powerGroup").groupby(by="Material")["powerGroup"].unique()

Material
Empty box with air    [0, 1, 2, 3, 4, 5, 6, 7]
W60G40-500gm          [0, 1, 2, 3, 4, 5, 6, 7]
WATER                 [0, 1, 2, 3, 4, 5, 6, 7]
Water25Gelatin75      [0, 1, 2, 3, 4, 5, 6, 7]
Water60Gelatin40      [0, 1, 2, 3, 4, 5, 6, 7]
Water90Gelatin10      [0, 1, 2, 3, 4, 5, 6, 7]
Name: powerGroup, dtype: object

In [50]:
df[["Material", "powerGroup", "RSSI", "RTT"]].describe(include='all')

Unnamed: 0,Material,powerGroup,RSSI,RTT
count,1440,1440.0,1440.0,1440.0
unique,6,8.0,,
top,Empty box with air,0.0,,
freq,240,180.0,,
mean,,,-32.886806,1.423611
std,,,6.501967,0.995332
min,,,-49.0,1.0
25%,,,-37.0,1.0
50%,,,-31.0,1.0
75%,,,-28.0,2.0


In [93]:
# Function to check assumptions within each power group
def check_assumptions(data, group_col, value_col, power_col):
    power_groups = data[power_col].unique()

    for power in power_groups:
        subset = data[data[power_col] == power]
        groups = subset[group_col].unique()

        print(f"\nChecking assumptions for power group {power}:")

        # Normality check using Shapiro-Wilk test
        print("\nNormality Check (Shapiro-Wilk Test):")
        for group in groups:
            group_data = subset[subset[group_col] == group][value_col]
            stat, p_value = stats.shapiro(group_data)
            print(f'{group}: W={stat:.4f}, p-value={p_value:.4f}')

            # Q-Q Plot for each group using Plotly
            qq_fig = go.Figure()
            qq_fig.add_trace(go.Scatter(
                x=stats.probplot(group_data, dist="norm")[0][0], 
                y=stats.probplot(group_data, dist="norm")[0][1],
                mode='markers',
                name=f'{group} Q-Q Plot'
            ))
            qq_fig.add_trace(go.Scatter(
                x=stats.probplot(group_data, dist="norm")[0][0], 
                y=stats.probplot(group_data, dist="norm")[0][0],
                mode='lines',
                name='Ideal Line'
            ))
            qq_fig.update_layout(
                title=f'Q-Q Plot for {group} (Power Group {power})',
                xaxis_title='Theoretical Quantiles',
                yaxis_title='Sample Quantiles',
                template='plotly_dark'
            )
            qq_fig.show()

        # Homogeneity of variances check using Levene's test
        print("\nHomogeneity of Variances Check (Levene's Test):")
        group_data = [subset[subset[group_col] == group][value_col] for group in groups]
        stat, p_value = stats.levene(*group_data)
        print(f'Levene\'s Test: W={stat:.4f}, p-value={p_value:.4f}')

# Check assumptions for RSSI within each power group
print("Checking assumptions for RSSI within each power group:")
check_assumptions(df, 'Material', 'RSSI', 'powerGroup')

# Check assumptions for RTT within each power group
print("\nChecking assumptions for RTT within each power group:")
check_assumptions(df, 'Material', 'RTT', 'powerGroup')

Checking assumptions for RSSI within each power group:

Checking assumptions for power group 0:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7010, p-value=0.0000


W60G40-500gm: W=0.6464, p-value=0.0000


WATER: W=0.3266, p-value=0.0000


Water25Gelatin75: W=0.2845, p-value=0.0000


Water60Gelatin40: W=0.5856, p-value=0.0000


Water90Gelatin10: W=0.4922, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.3193, p-value=0.2580

Checking assumptions for power group 1:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.6190, p-value=0.0000


W60G40-500gm: W=0.4044, p-value=0.0000


WATER: W=0.6366, p-value=0.0000


Water25Gelatin75: W=0.6382, p-value=0.0000


Water60Gelatin40: W=0.6366, p-value=0.0000


Water90Gelatin10: W=0.6160, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=4.7916, p-value=0.0004

Checking assumptions for power group 2:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7200, p-value=0.0000


W60G40-500gm: W=0.5258, p-value=0.0000


WATER: W=0.5024, p-value=0.0000


Water25Gelatin75: W=0.6236, p-value=0.0000


Water60Gelatin40: W=0.6318, p-value=0.0000


Water90Gelatin10: W=0.7403, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.3929, p-value=0.2292

Checking assumptions for power group 3:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7879, p-value=0.0000


W60G40-500gm: W=0.5558, p-value=0.0000


WATER: W=0.7914, p-value=0.0000


Water25Gelatin75: W=0.5587, p-value=0.0000


Water60Gelatin40: W=0.6160, p-value=0.0000


Water90Gelatin10: W=0.7034, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.6338, p-value=0.1534

Checking assumptions for power group 4:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7539, p-value=0.0000


W60G40-500gm: W=0.5540, p-value=0.0000


WATER: W=0.6625, p-value=0.0000


Water25Gelatin75: W=0.6236, p-value=0.0000


Water60Gelatin40: W=0.6318, p-value=0.0000


Water90Gelatin10: W=0.8380, p-value=0.0004



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.0701, p-value=0.3786

Checking assumptions for power group 5:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7053, p-value=0.0000


W60G40-500gm: W=0.6318, p-value=0.0000


WATER: W=0.6382, p-value=0.0000


Water25Gelatin75: W=0.6366, p-value=0.0000


Water60Gelatin40: W=0.6625, p-value=0.0000


Water90Gelatin10: W=0.6316, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=0.1347, p-value=0.9842

Checking assumptions for power group 6:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.6804, p-value=0.0000


W60G40-500gm: W=0.2025, p-value=0.0000


WATER: W=0.6366, p-value=0.0000


Water25Gelatin75: W=0.5140, p-value=0.0000


Water60Gelatin40: W=0.4955, p-value=0.0000


Water90Gelatin10: W=0.6318, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=0.6476, p-value=0.6637

Checking assumptions for power group 7:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.6756, p-value=0.0000


W60G40-500gm: W=0.6381, p-value=0.0000


WATER: W=0.6733, p-value=0.0000


Water25Gelatin75: W=0.6276, p-value=0.0000


Water60Gelatin40: W=0.6572, p-value=0.0000


Water90Gelatin10: W=0.6500, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.2760, p-value=0.2764

Checking assumptions for RTT within each power group:

Checking assumptions for power group 0:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.4735, p-value=0.0000


W60G40-500gm: W=0.5258, p-value=0.0000


WATER: W=0.5258, p-value=0.0000


Water25Gelatin75: W=0.4922, p-value=0.0000


Water60Gelatin40: W=0.4922, p-value=0.0000


Water90Gelatin10: W=0.5540, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=0.1219, p-value=0.9874

Checking assumptions for power group 1:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.6572, p-value=0.0000


W60G40-500gm: W=0.4922, p-value=0.0000


WATER: W=0.5181, p-value=0.0000


Water25Gelatin75: W=0.4976, p-value=0.0000


Water60Gelatin40: W=0.5540, p-value=0.0000


Water90Gelatin10: W=0.4735, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=7.3984, p-value=0.0000

Checking assumptions for power group 2:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7407, p-value=0.0000


W60G40-500gm: W=0.6625, p-value=0.0000


WATER: W=0.3293, p-value=0.0000


Water25Gelatin75: W=0.5258, p-value=0.0000


Water60Gelatin40: W=0.6236, p-value=0.0000


Water90Gelatin10: W=0.5935, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=5.6787, p-value=0.0001

Checking assumptions for power group 3:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.4539, p-value=0.0000


W60G40-500gm: W=0.4522, p-value=0.0000


WATER: W=0.5258, p-value=0.0000


Water25Gelatin75: W=0.4281, p-value=0.0000


Water60Gelatin40: W=0.5540, p-value=0.0000


Water90Gelatin10: W=0.5181, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=1.4219, p-value=0.2186

Checking assumptions for power group 4:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.7879, p-value=0.0000


W60G40-500gm: W=0.5587, p-value=0.0000


WATER: W=0.5258, p-value=0.0000


Water25Gelatin75: W=0.6119, p-value=0.0000


Water60Gelatin40: W=0.5258, p-value=0.0000


Water90Gelatin10: W=0.5774, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=7.3814, p-value=0.0000

Checking assumptions for power group 5:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.8329, p-value=0.0003


W60G40-500gm: W=0.4044, p-value=0.0000


WATER: W=0.4044, p-value=0.0000


Water25Gelatin75: W=0.6232, p-value=0.0000


Water60Gelatin40: W=0.5966, p-value=0.0000


Water90Gelatin10: W=0.4044, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=11.6455, p-value=0.0000

Checking assumptions for power group 6:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.8097, p-value=0.0001


W60G40-500gm: W=0.4922, p-value=0.0000


WATER: W=0.4922, p-value=0.0000


Water25Gelatin75: W=0.4708, p-value=0.0000


Water60Gelatin40: W=0.5540, p-value=0.0000


Water90Gelatin10: W=0.5774, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=14.5591, p-value=0.0000

Checking assumptions for power group 7:

Normality Check (Shapiro-Wilk Test):
Empty box with air: W=0.6021, p-value=0.0000


W60G40-500gm: W=0.5540, p-value=0.0000


WATER: W=0.5181, p-value=0.0000


Water25Gelatin75: W=0.5905, p-value=0.0000


Water60Gelatin40: W=0.5587, p-value=0.0000


Water90Gelatin10: W=0.4922, p-value=0.0000



Homogeneity of Variances Check (Levene's Test):
Levene's Test: W=6.1422, p-value=0.0000


In [85]:
# List of unique power groups
power_groups = df['powerGroup'].unique()

# Perform ANOVA for each power group
anova_results = {}
for power in power_groups:
    subset = df[df['powerGroup'] == power]
    anova_model = ols('RSSI ~ C(Material)', data=subset).fit()
    anova_table = sm.stats.anova_lm(anova_model, typ=2)
    anova_results[power] = anova_table

# Calculate effect sizes (Cohen's d) between materials at each power level
def cohen_d(group1, group2):
    diff = group1.mean() - group2.mean()
    pooled_std = ((group1.std()**2 + group2.std()**2) / 2)**0.5
    return diff / pooled_std

effect_sizes = {}
for power in power_groups:
    subset = df[df['powerGroup'] == power]
    materials = subset['Material'].unique()
    combinations = list(itertools.combinations(materials, 2))
    for (mat1, mat2) in combinations:
        group1 = subset[subset['Material'] == mat1]['RSSI']
        group2 = subset[subset['Material'] == mat2]['RSSI']
        effect_sizes[(power, mat1, mat2)] = cohen_d(group1, group2)

# Visualize the results using Plotly (dark theme)
fig = make_subplots(rows=1, cols=2, subplot_titles=("ANOVA Results", "Effect Sizes"))

# ANOVA plot
anova_data = []
for power, table in anova_results.items():
    anova_data.append(go.Bar(name=f'Power Group {power}', x=table.index, y=table['PR(>F)']))

fig.add_traces(anova_data, rows=1, cols=1)

# Effect sizes plot
effect_data = []
for (power, mat1, mat2), effect_size in effect_sizes.items():
    effect_data.append(go.Bar(name=f'{power} ({mat1} vs {mat2})', x=[f'{mat1} vs {mat2}'], y=[effect_size]))

fig.add_traces(effect_data, rows=1, cols=2)

# Update layout
fig.update_layout(title_text="Statistical Analysis of Materials' Interaction with Signal Strength")
fig.show()

In [96]:
# Function to perform Kruskal-Wallis test and visualize results
def kruskal_wallis_test(data, group_col, value_col, power_col):
    power_groups = data[power_col].unique()
    results = {}

    for power in power_groups:
        subset = data[data[power_col] == power]
        groups = subset[group_col].unique()
        group_data = [subset[subset[group_col] == group][value_col] for group in groups]
        
        # Kruskal-Wallis test
        stat, p_value = stats.kruskal(*group_data)
        results[power] = (stat, p_value)

        print(f"Kruskal-Wallis Test for Power Group {power}: H-statistic={stat:.4f}, p-value={p_value:.4f}")

    return results

# Function to calculate rank-biserial correlation
def rank_biserial_correlation(group1, group2):
    u_stat, p_value = stats.mannwhitneyu(group1, group2)
    n1 = len(group1)
    n2 = len(group2)
    rbc = 1 - 2 * u_stat / (n1 * n2)
    return rbc

# Perform Kruskal-Wallis test for RSSI and RTT within each power group
print("Performing Kruskal-Wallis Test for RSSI within each power group:")
kruskal_results_rssi = kruskal_wallis_test(df, 'Material', 'RSSI', 'powerGroup')

print("\nPerforming Kruskal-Wallis Test for RTT within each power group:")
kruskal_results_rtt = kruskal_wallis_test(df, 'Material', 'RTT', 'powerGroup')

# Calculate rank-biserial correlation for effect size
print("\nCalculating Rank-Biserial Correlation for Effect Size (RSSI):")
effect_sizes_rssi = {}
for power in df['powerGroup'].unique():
    subset = df[df['powerGroup'] == power]
    materials = subset['Material'].unique()
    for mat1, mat2 in itertools.combinations(materials, 2):
        group1 = subset[subset['Material'] == mat1]['RSSI']
        group2 = subset[subset['Material'] == mat2]['RSSI']
        rbc = rank_biserial_correlation(group1, group2)
        effect_sizes_rssi[(power, mat1, mat2)] = rbc

# Visualize results using Plotly (dark theme)
fig = make_subplots(rows=1, cols=2, subplot_titles=("Kruskal-Wallis Results", "Effect Sizes"))

# Kruskal-Wallis results plot
kw_data = [go.Bar(name=f'Power Group {power}', x=['RSSI', 'RTT'], y=[results[0] for power, results in kruskal_results_rssi.items()])]
fig.add_traces(kw_data, rows=1, cols=1)

# Effect sizes plot
effect_data = []
for (power, mat1, mat2), effect_size in effect_sizes_rssi.items():
    effect_data.append(go.Bar(name=f'{power} ({mat1} vs {mat2})', x=[f'{mat1} vs {mat2}'], y=[effect_size]))

fig.add_traces(effect_data, rows=1, cols=2)

# Update layout
fig.update_layout(title_text="Non-Parametric Analysis of Materials' Interaction with Signal Strength")
fig.show()


Performing Kruskal-Wallis Test for RSSI within each power group:
Kruskal-Wallis Test for Power Group 0: H-statistic=166.5265, p-value=0.0000
Kruskal-Wallis Test for Power Group 1: H-statistic=169.3044, p-value=0.0000
Kruskal-Wallis Test for Power Group 2: H-statistic=165.6234, p-value=0.0000
Kruskal-Wallis Test for Power Group 3: H-statistic=162.9903, p-value=0.0000
Kruskal-Wallis Test for Power Group 4: H-statistic=172.1372, p-value=0.0000
Kruskal-Wallis Test for Power Group 5: H-statistic=164.8360, p-value=0.0000
Kruskal-Wallis Test for Power Group 6: H-statistic=165.6475, p-value=0.0000
Kruskal-Wallis Test for Power Group 7: H-statistic=60.5095, p-value=0.0000

Performing Kruskal-Wallis Test for RTT within each power group:
Kruskal-Wallis Test for Power Group 0: H-statistic=0.5827, p-value=0.9888
Kruskal-Wallis Test for Power Group 1: H-statistic=29.1523, p-value=0.0000
Kruskal-Wallis Test for Power Group 2: H-statistic=11.1032, p-value=0.0494
Kruskal-Wallis Test for Power Group 3: 

In [51]:
df.Material.unique()

array(['Empty box with air', 'W60G40-500gm', 'WATER', 'Water25Gelatin75',
       'Water60Gelatin40', 'Water90Gelatin10'], dtype=object)

In [52]:
tdf = df.loc[(df.Material == 'Water90Gelatin10') & (df.powerGroup == 3), "RSSI"]
stat, p_value = stats.shapiro(tdf)
stat, p_value

(0.7034026384353638, 1.7492644701633253e-06)

In [71]:
df["recordIndex"] = list(np.arange(30)) * df.Material.nunique() * df.powerGroup.nunique()

In [77]:
tdf = df.loc[df.powerGroup == 3, ["Material", "RSSI"]]

In [80]:
tdf.pivot(index=None, columns="Material", values="RSSI")

Material,Empty box with air,W60G40-500gm,WATER,Water25Gelatin75,Water60Gelatin40,Water90Gelatin10
90,-38.0,,,,,
91,-37.0,,,,,
92,-37.0,,,,,
93,-38.0,,,,,
94,-37.0,,,,,
...,...,...,...,...,...,...
1315,,,,,,-42.0
1316,,,,,,-42.0
1317,,,,,,-41.0
1318,,,,,,-41.0


In [82]:
xlist = []
for mat in df.Material.unique():
    ls = df.loc[]

array(['Empty box with air', 'W60G40-500gm', 'WATER', 'Water25Gelatin75',
       'Water60Gelatin40', 'Water90Gelatin10'], dtype=object)

In [29]:
df["graphDisplayRow"] = 2
df.loc[df.TxPower.isin([6, 18, 26, 32]), "graphDisplayRow"] = 1

In [35]:
[(pow, col, row) for pow, col, row in zip(df.TxPower.sort_values().unique(),['a', 'b', 'c', 'd'] * 2, [1,1,1,1,2,2,2,2])]

[(6, 'a', 1),
 (18, 'b', 1),
 (26, 'c', 1),
 (32, 'd', 1),
 (50, 'a', 2),
 (58, 'b', 2),
 (70, 'c', 2),
 (76, 'd', 2)]

In [None]:
df["graphDisplayCol"] = None
for pow, col in zip(df.TxPower.sort_values().unique(),['a', 'b', 'c', 'd'] * 2):
    df.loc[df.TxPower == pow, "graphDisplayCol"] = col

In [44]:
fig = make_subplots(rows=2, cols=4)
fig.append_trace(go.Hi)

In [45]:
import plotly.figure_factory as ff

In [None]:
ff.create_distplot()

In [None]:
go.Histogram()

In [41]:
fig = px.histogram(df, x="RSSI",facet_col="graphDisplayCol", facet_row="graphDisplayRow", color="Material")
fig.show()