# Imports

In [None]:
import pandas as pd
from scipy.stats import spearmanr
from ..config import RAW_DATA_DIR
import seaborn as sns
import matplotlib.pyplot as plt

# Load Raw Data

In [None]:

df = pd.read_csv(RAW_DATA_DIR + '/dataset1.csv')

# Compute Relevant Features

In [None]:
feature_cols = [col for col in df.columns if "condition"]

"""Features themselves should be calculated..."""

# Compute Spearman correlations between each selected feature and the expression level

In [None]:
correlations = {}
for col in feature_cols:
    corr, p_value = spearmanr(df[col], df['expression_level'])
    correlations[col] = {'correlation': corr, 'p_value': p_value}

# Display Results

In [None]:
for feature, stats in correlations.items():
    print(f"{feature}: Correlation = {stats['correlation']:.3f}, P-value = {stats['p_value']:.3e}")

# Plot Results

In [None]:
corr_df = pd.DataFrame.from_dict(correlations, orient='index')
corr_df['feature'] = corr_df.index
corr_df = corr_df.sort_values('correlation', key=abs, ascending=False)  # Sort by absolute correlation

plt.figure(figsize=(12, 8))
sns.barplot(data=corr_df, x='correlation', y='feature', palette='coolwarm', hue='correlation', legend=False)
plt.title('Spearman Correlations with Expression Level')
plt.xlabel('Correlation Coefficient')
plt.ylabel('Feature')
plt.axvline(x=0, color='black', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()