In [None]:
import pandas as pd
from scipy.stats import pearsonr, spearmanr, kendalltau
import numpy as np

def calculate_correlations(df):
    # Initialize the result
    results = []

    # Get the number of columns
    num_cols = df.shape[1]
    
    # Determine NA value
    na_check = df.isnull()

    # Calculate the correlation between the first n-1 columns and the n-th column
    for i in range(num_cols - 1):
        # Extract the current column and the n-th column
        col1 = df.iloc[(~na_check.iloc[:,i]).tolist(),i]
        col2 = df.iloc[(~na_check.iloc[:,i]).tolist(),-1]

        # Calculate Pearson correlation and its p value
        pearson_corr, pearson_p_value = pearsonr(col1, col2)

        # Calculate Spearson correlation and its p value
        spearman_corr, spearman_p_value = spearmanr(col1, col2)

        # Calculate Kendall correlation and its p value
        kendall_corr, kendall_p_value = kendalltau(col1, col2)

        # Add the results to the list
        results.append({
            'Column': df.columns[i],
            'Pearson Correlation': pearson_corr,
            'Pearson P-value': pearson_p_value,
            'Spearman Correlation': spearman_corr,
            'Spearman P-value': spearman_p_value,
            'Kendall Correlation': kendall_corr,
            'Kendall P-value': kendall_p_value
        })

    # Convert the results into a data frame
    result_df = pd.DataFrame(results)
    return result_df

In [None]:
tissue_list = ['Blood','Brain','Lung','Skin']# ['Blood','Brain','Lung','Skin']
for tissue in tissue_list:
    print(tissue)
    df = pd.read_csv(f'../../train_data/bootstrap_{tissue}_0.2_add.csv',index_col = 0)

    # Calculate the correlation and output the result
    result_df = calculate_correlations(df)

    # Read the chromosome location information
    data = pd.read_csv(f'../../train_data/merge_{tissue}_withchr.csv',index_col = 0)
    data.index = [str(i) for i in data.index.tolist()]
    data = data.loc[df.columns.tolist()[:-1]]
    data.reset_index(inplace = True,drop = True)

    # Concatenate Chr, Start, End, correlation and P value together
    data_merge = pd.concat([data.iloc[:,0:3],result_df.iloc[:,1:]],axis = 1)
    data_merge.to_csv(f'Manhattan_data_{tissue}_add.csv')