In [1]:
%pip install matplotlib 
%pip install spicy
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import ttest_ind
import os 
from statsmodels.stats.multitest import multipletests

downloads_folder = os.path.expanduser('~') + '/Downloads/'
ruta_archivo = os.path.join(downloads_folder, 'Cpvalues.xlsx')
data=pd.read_excel(ruta_archivo)


# Assuming df is your DataFrame with loaded data
# Replace 'your_file.csv' with the actual file path or DataFrame name
# For instance, you can read a CSV file using: df = pd.read_csv('your_file.csv')

# Extract Cp values
cp_values = data.iloc[3:, 2:].astype(float)



# Calculate the mean for each column
column_means = cp_values.mean(axis=0)

# Subtract the mean of each column from the Cp values to get dCp values
df_dcp = cp_values.sub(column_means, axis=1)

# Divide dCp values into different groups
control_group_dcp = df_dcp.iloc[:, :9]
moderate_group_dcp = df_dcp.iloc[:, 9:20]
severe_group_dcp = df_dcp.iloc[:, 20:]

# Replace column names with sample names
sample_names = ["Gene", "Position"] + ["sample1", "sample2", "sample3", "sample4", "sample5", "sample6", "sample7", "sample8", "sample9",
                                        "sample10", "sample11", "sample12", "sample13", "sample14", "sample15", "sample16", "sample17", "sample18",
                                        "sample19", "sample20", "sample21", "sample22", "sample23", "sample24", "sample25", "sample26", "sample27",
                                        "sample28", "sample29"]

# Assign the sample names to the columns of the result_df_grouped_dcp DataFrame
df_dcp.columns = sample_names[2:]

# If you want to include gene names and position indices in the result
result_df_grouped_dcp = pd.concat([data.iloc[3:, :2], control_group_dcp, moderate_group_dcp, severe_group_dcp], axis=1)

# Calculate the mean of each group
control_group_mean = control_group_dcp.mean(axis=1)
moderate_group_mean = moderate_group_dcp.mean(axis=1)
severe_group_mean = severe_group_dcp.mean(axis=1)

# Calculate 2^(-dCp)*1000 for each group (intermediate calculation)
control_group_intermediate = (2 ** (-control_group_dcp)) * 10000
moderate_group_intermediate = (2 ** (-moderate_group_dcp)) * 10000
severe_group_intermediate = (2 ** (-severe_group_dcp)) * 10000

# Calculate the average for each group of the intermediate calculation
control_group_mean_intermediate = control_group_intermediate.mean(axis=1)
moderate_group_mean_intermediate = moderate_group_intermediate.mean(axis=1)
severe_group_mean_intermediate = severe_group_intermediate.mean(axis=1)

# Calculate 2^(-mean of intermediate calculation)*10000 for each group
control_group_final = (2 ** (-control_group_mean_intermediate)) * 10000
moderate_group_final = (2 ** (-moderate_group_mean_intermediate)) * 10000
severe_group_final = (2 ** (-severe_group_mean_intermediate)) * 10000

# Calculate standard deviations for each group
control_group_std = pd.Series(control_group_dcp.std(axis=1), name="Control_Std")
moderate_group_std = pd.Series(moderate_group_dcp.std(axis=1), name="Moderate_Std")
severe_group_std = pd.Series(severe_group_dcp.std(axis=1), name="Severe_Std")


# Calculate ddCp values
ddCp1 = moderate_group_mean - control_group_mean
ddCp2 = severe_group_mean - moderate_group_mean
ddCp3 = severe_group_mean - control_group_mean

# Calculate 2^(-ddCp) for each group
group1_2_pow_minus_ddCp = (2 ** (-ddCp1))
group2_3_pow_minus_ddCp = (2 ** (-ddCp2))
group3_pow_minus_ddCp = (2 ** (-ddCp3))

#  Calculate fold change for each group
fold_change_group1 = group1_2_pow_minus_ddCp.apply(lambda x: x if x > 1 else -1 / x)
fold_change_group2 = group2_3_pow_minus_ddCp.apply(lambda x: x if x > 1 else -1 / x)
fold_change_group3 = group3_pow_minus_ddCp.apply(lambda x: x if x > 1 else -1 / x)

# Calculate p-values using ttest_ind for each gene
p_values_group2_1 = ttest_ind(moderate_group_dcp, control_group_dcp, axis=1).pvalue
p_values_group3_1 = ttest_ind(severe_group_dcp, control_group_dcp, axis=1).pvalue
p_values_group3_2 = ttest_ind(severe_group_dcp, moderate_group_dcp, axis=1).pvalue




# Create a DataFrame with p-values, corrected p-values, and fold change groups
result_df = pd.DataFrame({
    'Gene': data.iloc[3:, 0],
    'Position': data.iloc[3:, 1],
    'FoldChange_Group1': fold_change_group1,
    'FoldChange_Group2': fold_change_group2,
    'FoldChange_Group3': fold_change_group3,
    'P_Value_Group2_1': p_values_group2_1,
    'P_Value_Group3_1': p_values_group3_1,
    'P_Value_Group3_2': p_values_group3_2,
})

print(result_df)
# Export the DataFrame to an Excel file
output_file = os.path.join(downloads_folder, 'resultsmicros.xlsx')
result_df.to_excel(output_file, index=False)



Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


   Gene      Position  FoldChange_Group1  FoldChange_Group2  \
3    A2          EGOT          -1.017888          -1.212115   
4    A3         NRAV           -1.267113           1.114198   
5    A4     A2ML1-AS1           1.137461          -1.197623   
6    A5        EMX2OS           1.010860           1.271239   
7    A6      GNAS-AS1                NaN                NaN   
..  ...           ...                ...                ...   
90   H5           NaN                NaN                NaN   
91   H6           NaN                NaN                NaN   
92   H7           NaN                NaN                NaN   
93  NaN           NaN                NaN                NaN   
94  NaN  GLOBAL MEAN*          -1.000000          -1.000000   

    FoldChange_Group3  P_Value_Group2_1  P_Value_Group3_1  P_Value_Group3_2  
3           -1.233797          0.900830          0.288440          0.246576  
4           -1.137242          0.030881          0.287999          0.297392  
5        