In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt


In [None]:
def read_csv(csv_file):
    
    # read csv file
    df = pd.read_csv(csv_file)

    # keep only Geneid, 'shScramble' and 'shDDX41'
    filtered_df = df[['Geneid', 'shScramble', 'shDDX41']]
    filtered_df = filtered_df.dropna(subset=['Geneid', 'shScramble', 'shDDX41'])

    return filtered_df

In [None]:
def tukey_test(df):
    
    melted_df = pd.melt(df, value_vars=['shScramble', 'shDDX41'], var_name='treatment', value_name='value')

    # Effectuer l'ANOVA
    model = ols('value ~ treatment', data=melted_df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    print(anova_table)

    # Effectuer le test de Tukey
    tukey = pairwise_tukeyhsd(endog=melted_df['value'], groups=melted_df['treatment'], alpha=0.05)
    print(tukey)

In [None]:
def deviation(arr):
    median = np.median(arr)
    deviation = np.median(np.abs(arr - median))
    return deviation


In [None]:
def median_absolute_deviation(df):
        
    median = df.iloc[:, 1].std()
    MAD = stats.median_abs_deviation(df.iloc[:, 1])
    MAD_val_plus = median + 2 * MAD
    MAD_val_minus = median - 2 * MAD
    print(median, MAD)
    
    values_above_MAD = df[df.iloc[:, 1] > MAD_val_plus]
    values_below_MAD = df[df.iloc[:, 1] < MAD_val_minus]
    #values_above_MAD.merge(values_below_MAD, how='inner', on='Geneid')
        
    return values_below_MAD

In [None]:
def visualisation(df):

    df['Difference'] = df['shScramble'] - df['shDDX41']
    
    

    # Créer l'histogramme
    plt.figure(figsize=(10, 6))
    plt.hist(df['Difference'], bins=50, edgecolor='black')
    plt.title('Histogramme de la différence entre shScramble et shDDX41')
    plt.xlabel('Différence')
    plt.ylabel('Fréquence')
    plt.grid(True)
    plt.show()


In [None]:
csv_file = 'donnees_Riboseq.csv'
df = read_csv(csv_file)
tukey_test(df)
visualisation(df)
mad_shScramble = median_absolute_deviation(df[['Geneid', 'shScramble']])
mad_shDDX41 = median_absolute_deviation(df[['Geneid', 'shDDX41']])

# Afficher les résultats
#print(f'Median Absolute Deviation for shScramble: {mad_shScramble}')
#print(f'Median Absolute Deviation for shDDX41: {mad_shDDX41}')


