In [9]:
import pandas as pd
from scipy.stats import spearmanr
from itertools import combinations

# Load data
file_path = 'cellline_sample.csv'
data = pd.read_csv(file_path)

transposed_data = data.set_index("Feature")  # Features are rows, so set index to "Feature"
# No need for transpose since features are already rows and cell lines are columns

# Get the list of features
features = transposed_data.index

# Store results for each feature pair
results = []

# Compute Spearman correlations for each unique pair of features
for feature_1, feature_2 in combinations(features, 2):  # Use combinations to avoid duplicates
    # Select only the two features, dropping any NaN values
    valid_data = transposed_data.loc[[feature_1, feature_2]].dropna(axis=1)  # Drop columns with NaNs for either feature
    count = valid_data.shape[1]  # Count how many data points exist for this pair
    if count > 1:  # Ensure there are enough data points for correlation
        # Get the values for the two features
        feature_1_values = valid_data.loc[feature_1]
        feature_2_values = valid_data.loc[feature_2]
        
        # Compute Spearman correlation and p-value
        corr, p_value = spearmanr(feature_1_values, feature_2_values)
        results.append([feature_1, feature_2, count, corr, p_value])

# Convert the results into a DataFrame
results_df = pd.DataFrame(results, columns=["Feature 1", "Feature 2", "Count", "Spearman Correlation", "Spearman P-Value"])


# Save the DataFrame as a CSV file
output_path = 'correlation_results.csv'
results_df.to_csv(output_path, index=False)


results_df.head(10)



Unnamed: 0,Feature 1,Feature 2,Count,Spearman Correlation,Spearman P-Value
0,A1BG,A1CF,44,-0.048747,0.753345
1,A1BG,A2M,44,0.175844,0.253552
2,A1BG,A2ML1,44,-0.034997,0.821569
3,A1BG,A3GALT2,44,-0.038521,0.803939
4,A1BG,A4GALT,44,0.234046,0.126235
5,A1BG,A4GNT,44,0.136323,0.377581
6,A1BG,AAAS,44,0.293315,0.053315
7,A1BG,AACS,44,0.227438,0.137608
8,A1BG,AADAC,44,-0.200205,0.19257
9,A1BG,AADACL2,44,-0.181073,0.239484
