# Analysis compression ratio

22 July 2023 Guido Cattani

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from scipy.stats import shapiro as shapiro

In [2]:
# read compression ratio (CR) data
def read_CR():
    f_in = '/media/guido/LACIE/Cingle_Guido/Master/Implant/Calculated_data/Compression_ratio.csv'
    p_in = Path(f_in)   
    df = pd.read_csv(p_in, index_col = 'Study_ID')
    df = df.fillna(pd.NA)
    return df

In [3]:
def adjust_columns_f(df):
    # adjust column names, change format columns labels 'f_Hz' to f
    clmns = df.columns
    l = list()
    for clm in clmns:
        l.append(clm)
    l2 = list()
    for s in l:
        s = s.rstrip('_Hz')
        s = s.lstrip('BCdimp_')
        f = int(s)
        l2.append(f)
    d = dict()
    d1 = dict()
    for i in range(len(l)):
        d.update({l[i] : l2[i]})
        d1.update({l2[i] : l[i]})
    df.rename(columns=d, inplace = True)
    return df

In [4]:
def cal_quantile(df):
    quantiles = [0.10, 0.50, 0.90]
    q = (df.quantile(q=quantiles)).round(1)
    q = q.reset_index()
    diq = {0:'CR P10', 1:'CR P50', 2:'CR P90'} 
    q = q.rename(index=diq)
    q.drop('index', axis=1, inplace=True)
    return q

In [5]:
def descriptive_stat(df):
    # calculate quantiles
    quantiles = df.quantile([0.1, 0.5, 0.9]).round(1)
    quantiles.index = ['P10', 'P50', 'P90']
 
    # calculate mean and standard deviation
    mean_values = (pd.DataFrame({'Mean': df.mean().round(1)})).T
    std_values = (pd.DataFrame({'St. dev.': df.std().round(1)})).T

    # perform Shapiro test 
    sht = shapiro_test_norm(df)

    # combine results into a single DataFrame
    res = pd.concat([quantiles, mean_values, std_values, sht])
    
    return res.round(1)

In [6]:
def shapiro_test_norm(df):
    # check normality with Shapiro-Wilk test
    shapiro_result = df.apply(lambda x: shapiro(x) if len(x) >= 3 else (float('nan'), float('nan')))
    
    shapiro_stats = shapiro_result.apply(lambda x: round(x[0], 3))
    p_values = shapiro_result.apply(lambda x: round(x[1], 3))
    is_normal = p_values >= 0.05
    
    shapiro_test = pd.DataFrame({
        'Shapiro test statistic': shapiro_stats,
        'p-value': p_values,
        'normally distributed': is_normal
    }).transpose()
    
    return shapiro_test

In [7]:
cr = read_CR()

In [8]:
des_stat = descriptive_stat(cr)
des_stat

Unnamed: 0,f_250_Hz,f_315_Hz,f_400_Hz,f_500_Hz,f_630_Hz,f_800_Hz,f_1000_Hz,f_1250_Hz,f_1600_Hz,f_2000_Hz,f_2500_Hz,f_3150_Hz,f_4000_Hz,f_5000_Hz,f_6300_Hz,f_8000_Hz
P10,1.1,1.0,0.9,1.0,1.0,1.0,1.0,1.0,1.1,1.0,0.9,0.9,0.9,0.9,0.9,1.0
P50,1.1,1.1,1.0,1.0,1.0,1.0,1.1,1.1,1.1,1.1,1.0,0.9,0.9,1.0,1.0,1.0
P90,1.2,1.4,1.2,1.1,1.1,1.1,1.1,1.1,1.2,1.1,1.0,1.0,1.0,1.0,1.0,1.1
Mean,1.1,1.2,1.1,1.0,1.0,1.0,1.1,1.1,1.1,1.1,1.0,1.0,1.0,1.0,1.0,1.0
St. dev.,0.1,0.2,0.1,0.1,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
Shapiro test statistic,0.932,0.784,0.803,0.716,0.751,0.833,0.943,0.941,0.954,0.962,0.919,0.85,0.893,0.942,0.974,0.972
p-value,0.171,0.001,0.001,0.0,0.0,0.003,0.279,0.247,0.431,0.585,0.095,0.005,0.031,0.258,0.845,0.8
normally distributed,True,False,False,False,False,False,True,True,True,True,True,False,False,True,True,True


In [9]:
# name file with stat results
f_ar = "compression_ratio.csv"

# save stat to file
d_ar = "/media/guido/LACIE/Cingle_Guido/Master/Implant/Analysis_Results/"
p_ar = Path(d_ar + f_ar)
des_stat.to_csv(p_ar)