# This script fits a MNL to quantify celltypes' association with each LR:
#### Labels: LR (multiclass)
#### Features: Celltype
#### Weights: Coefficients from the MNL, quantifies association strength

In [32]:
import numpy as np
import csv
import pickle
import matplotlib
import math
import pandas as pd
import matplotlib
from sklearn.utils import resample
from sklearn import linear_model
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from collections import Counter
from scipy.stats import chi2
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler

import pandas as pd
import ast

In [33]:
def readCsv(x):
  """Parse file."""
  #colNames = ["method", "benchmark", "start", "end", "time", "memory"]
  df = pd.read_csv(x, sep=",")

  return df

def preprocessDf(df):
  """Transform ligand and receptor columns."""
  df["ligand-receptor"] = df["ligand"] + '-' + df["receptor"]
  df["component"] = df["component"] #.astype(str).str.zfill(2)

  return df

In [34]:
# Load subtype label
# subtype_label_file='/Users/victoriagao/local_docs/schwartz_data/PDAC_64630_subtype.csv'
subtype_label_file='/Users/victoriagao/local_docs/schwartz_data/experiment_data/Deisha/new_Deisha_Annot_May/exp2_D1_64630/fractional_abundances_by_spot.csv'
subtype_abundance_df = readCsv(subtype_label_file)
# subtype_label=[]
# with open(subtype_label_file) as file:
#     csv_file = csv.reader(file, delimiter=",")
#     for line in csv_file:
#         subtype_label.append(line)

# barcode_subtype=dict()
# for i in range(1,len(subtype_label)):
#     barcode_subtype[subtype_label[i][0]]= subtype_label[i][1]

# Load NEST output 
df = readCsv("/Users/victoriagao/local_docs/NEST/output/From_Fatema/NEST_combined_output_PDAC_64630.csv")
output_processed = preprocessDf(df)

In [35]:
subtype_abundance_df

Unnamed: 0,SpotID,MT1X EC,Tip EC,Venous EC,EHT EC,Arterial EC,Normal Duct,Acinar,Notta BasalA,EMT Duct,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
0,GTAAGCGGGCAGTCAG-1,0.333333,0.000000,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,TCTTACAGAGGTACCG-1,0.600000,0.000000,0.0,0.0,0.0,0.4,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,GACAACGCAGCTTACG-1,0.777778,0.111111,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CTACGACTAGCTATAA-1,0.500000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
4,GAAGAACGGTGCAGGT-1,0.500000,0.500000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1401,AATAGAACAGAGTGGC-1,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1402,TACAAGTCTCGTGCAT-1,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1403,CACCGCCAGAAGGTTT-1,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1404,GCCGCTTGTGAGAAAC-1,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Build feature matrix

In [36]:
### Merge NEST output with subtype label, and filter out the spots that are not in the subtype label
matched_spots_df_sender = pd.merge(output_processed, subtype_abundance_df, left_on='from_cell', right_on='SpotID')
matched_spots_df_receiver = pd.merge(output_processed, subtype_abundance_df, left_on='to_cell', right_on='SpotID')
matched_spots_df_total_raw = pd.concat([matched_spots_df_sender, matched_spots_df_receiver])


In [37]:
matched_spots_df_total_raw

Unnamed: 0,from_cell,to_cell,ligand,receptor,attention_score,component,from_id,to_id,ligand-receptor,SpotID,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
0,ACGCGCTACACAGGGT-1,AAACCGGGTAGGTACC-1,LGALS3,NPTN,0.911919,2,132,0,LGALS3-NPTN,ACGCGCTACACAGGGT-1,...,0.6,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
1,TCTTACTTATGCCTCT-1,AAAGTGTGATTTATCT-1,FN1,RPSA,0.929988,6,1228,9,FN1-RPSA,TCTTACTTATGCCTCT-1,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
2,AGTCTCACAAGACTAC-1,AAATTGATAGTCCTTT-1,PTPRF,RACK1,0.911508,10,228,15,PTPRF-RACK1,AGTCTCACAAGACTAC-1,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
3,GTGGGCTTAGACACAC-1,AACAACTGGTAGTTGC-1,FN1,RPSA,0.923395,11,992,17,FN1-RPSA,GTGGGCTTAGACACAC-1,...,0.0,0.0,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0
4,CCACAGTACCCATCCT-1,AACCGCTAAGGGATGC-1,FN1,RPSA,0.929303,16,425,23,FN1-RPSA,CCACAGTACCCATCCT-1,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1088,AACGTCAGACTAGTGG-1,TTGGCTCGCATGAGAC-1,TGFB1,EGFR,0.839991,17,31,1389,TGFB1-EGFR,TTGGCTCGCATGAGAC-1,...,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0
1089,AGATTATAGGACGTTT-1,TTGTAATCCGTACTCG-1,TGFB1,ITGB5,0.829314,9,184,1394,TGFB1-ITGB5,TTGTAATCCGTACTCG-1,...,0.0,0.0,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0
1090,AGATTATAGGACGTTT-1,TTGTAATCCGTACTCG-1,TGFB1,SDC2,0.855152,9,184,1394,TGFB1-SDC2,TTGTAATCCGTACTCG-1,...,0.0,0.0,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0
1091,GAGAGGTGCATTCTGG-1,TTGTTTCCATACAACT-1,TGFB1,EGFR,0.830097,2,715,1404,TGFB1-EGFR,TTGTTTCCATACAACT-1,...,0.0,0.0,0.0,0.0,1.00,0.0,0.0,0.0,0.0,0.0


In [38]:
# filter out the LR that only appeared once
matched_spots_df_total = matched_spots_df_total_raw[matched_spots_df_total_raw['ligand-receptor'].duplicated(keep=False)] 
# Take only top 50% LR by frequency
lr_counts = matched_spots_df_total['ligand-receptor'].value_counts()
threshold = lr_counts.quantile(0.50)  # gives the value at the 50th percentile
top_percent_lrs = lr_counts[lr_counts >= threshold].index
matched_spots_df_total = matched_spots_df_total[matched_spots_df_total['ligand-receptor'].isin(top_percent_lrs)]
# Delete some columns
matched_spots_df_total = matched_spots_df_total.drop(columns=['from_cell', 'to_cell', 'ligand', 'receptor', 'attention_score', 'component', 'from_id','to_id','SpotID'])

### MNL for LR, multiclass labels

#### Single run withtout iteration

In [39]:
# Create X and y for multiclass classification
X = matched_spots_df_total.drop(columns=["ligand-receptor"])
y_multiclass = matched_spots_df_total["ligand-receptor"]
model_log_multi = linear_model.LogisticRegression(multi_class='multinomial', solver='lbfgs', random_state=42) # set seed for reproducibility
model_log_multi.fit(X, y_multiclass)


In [40]:
# Extract coefficients and score
coef = model_log_multi.coef_  # Coefficients for the features
score = model_log_multi.score(X, y_multiclass)  # Accuracy score for how the model is fitted
score

0.16353754940711462

In [41]:
y_pred = model_log_multi.predict(X)
cm = confusion_matrix(y_multiclass, y_pred)
# Initialize arrays for TP, TN, FP, and FN
num_classes = cm.shape[0]
TP = np.zeros(num_classes)
TN = np.zeros(num_classes)
FP = np.zeros(num_classes)
FN = np.zeros(num_classes)
Sensitivity = np.zeros(num_classes)

# Compute TP, TN, FP, and FN for each class
for i in range(num_classes):
    TP[i] = cm[i, i]
    FP[i] = cm[:, i].sum() - cm[i, i]
    FN[i] = cm[i, :].sum() - cm[i, i]
    TN[i] = cm.sum() - (TP[i] + FP[i] + FN[i])
    Sensitivity[i] = TP[i] / (TP[i] + FN[i])

print("Confusion Matrix:\n", cm)
print("True Positives (TP):", TP)
print("True Negatives (TN):", TN)
print("False Positives (FP):", FP)
print("False Negatives (FN):", FN)
print("Sensitivity:", Sensitivity)



Confusion Matrix:
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
True Positives (TP): [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  61.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   5.   0.   0. 127.   0.   0.   0.   0.   0.  83.   0.   0.   0.
   0.   1.   0.  21.   0.  16.   0.   0.   0.   0.   0.  17.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
True Negatives (TN): [2014. 2018. 2004. 1996. 2014. 2018. 2018. 2016. 2016. 2014. 2018. 2014.
 2016. 2016. 2016. 2014. 2014. 2018. 1990. 1994. 2018. 2014. 2000. 2010.
 2008. 1633. 1979. 2016. 1984. 2018. 1996. 2016. 2010. 2008. 1998. 2014.
 2010. 2018. 2008. 2018. 2014. 1992. 1988. 1960. 2012. 2018. 1153. 1988.
 2012. 2010. 1998. 2004. 1486. 2014. 2018. 1988. 2014. 1930. 2014. 1784.
 2006. 1896. 2016. 2012. 2018. 2012. 1984.

In [42]:
#### Saving the coefficients to table outputs

results = pd.DataFrame()
# Feature names (LR pairs)
feature_names = matched_spots_df_total.columns.drop("ligand-receptor")

# Class names (assuming model.classes_ gives you the class names in the order they're in the coefficients matrix)
class_names = model_log_multi.classes_

# Dictionary to hold dataframes for each class
coefficients_dfs = {}

for index, class_name in enumerate(class_names):
    coefs = model_log_multi.coef_[index]
    # Create a DataFrame for each class
    class_df = pd.DataFrame({
        f"{class_name}": coefs
    }, index=feature_names)
    
    
    # Save the DataFrame in the dictionary
    coefficients_dfs[class_name] = class_df

    # transpose the dataframe
    class_df = class_df.T

    # Append class_df every iteration
    results = pd.concat([results, class_df])


In [43]:
results

Unnamed: 0,MT1X EC,Tip EC,Venous EC,EHT EC,Arterial EC,Normal Duct,Acinar,Notta BasalA,EMT Duct,Notta ClassicalA,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
ADAM9-ITGA6,-0.219117,-0.146402,-0.004948,-0.029370,0.0,-0.367010,-0.226216,0.076313,0.138015,0.467771,...,0.436624,-0.078401,-0.134702,-0.011719,-0.565488,0.676266,-0.140923,-0.098115,-0.295972,-0.027453
AIMP1-RACK1,-0.150063,-0.155792,-0.002761,-0.020307,0.0,-0.643864,0.721644,-0.146545,-0.223162,-0.647756,...,-0.075592,0.140034,-0.086101,-0.008043,0.803754,-0.157438,0.693713,-0.070809,-0.200839,-0.019925
APOE-SDC1,-0.387609,0.000858,-0.009550,0.177490,0.0,0.339781,-0.365542,-0.386844,1.511326,-1.097439,...,-0.048280,-0.135395,-0.278194,-0.020889,-0.305341,0.631550,0.263026,-0.187836,-0.494521,-0.051755
APOE-SDC4,-0.255477,-0.039340,-0.013389,-0.095657,0.0,-1.380706,-0.535153,-0.559815,0.897578,-0.802719,...,0.585188,1.584413,-0.322547,0.843254,-1.111757,-0.392742,-0.714360,-0.271219,0.609493,-0.078990
ARF1-LRP5,-0.124966,-0.184113,-0.001868,-0.015297,0.0,0.458592,-0.146868,-0.134310,-0.209391,2.497227,...,-0.083972,-0.040742,-0.067290,-0.005971,-0.333051,-0.131441,-0.180443,-0.053450,-0.159986,-0.014098
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THBS2-CD47,0.268492,-0.133082,-0.005776,-0.048781,0.0,-0.868934,-0.321730,1.381499,-0.005278,-0.728879,...,-0.196968,-0.121559,0.087640,-0.019280,0.609821,-0.357995,-0.201865,1.473797,-0.453263,-0.049638
THBS2-LRP1,-0.120799,-0.128506,-0.001879,-0.016231,0.0,-0.518608,-0.112281,-0.118789,-0.177718,-0.526036,...,-0.058789,-0.038329,-0.063943,-0.005978,1.738697,-0.141819,-0.165044,-0.061396,0.672013,-0.016923
TIMP1-LRP1,-0.071034,0.912145,0.166955,-0.156385,0.0,-0.117262,0.472644,-0.062895,-0.757545,-1.255117,...,-0.113097,-0.109374,0.978717,-0.046129,1.557106,0.690410,0.784722,0.314248,1.858265,-0.133957
TNFSF13-TNFRSF1A,-0.123050,-0.167517,-0.001896,-0.015343,0.0,-0.084053,-0.137514,-0.130959,-0.202531,2.124479,...,0.039026,-0.040891,-0.067184,-0.006037,-0.336803,-0.131600,-0.170507,-0.053006,-0.160132,-0.014229


#### Save results output table

In [44]:
# Save the results to a CSV file
results.to_csv("/Users/victoriagao/local_docs/NEST/stored_variables/64630_Celltype_LR_MNL__SenderORReceiver.csv", index=True)


### Define statistics calultion functions

In [45]:
# Define functions for bootstrapping and calculating p-values

def fit_model(X, y):
    model = linear_model.LogisticRegression(multi_class='multinomial', solver='lbfgs')
    model.fit(X, y)
    return model

# returns the coefficients using bootstrapping
def get_bootstrap_coefficients(X, y, n_iterations, model):
    n_classes, n_features = model.coef_.shape
    bootstrap_coefs = np.zeros((n_iterations, n_classes, n_features))
    for i in range(n_iterations):
        try:
            X_sample, y_sample = resample(X, y)
            model.fit(X_sample, y_sample)
            if model.coef_.shape == (n_classes, n_features):
                bootstrap_coefs[i] = model.coef_
        except ValueError as e:
            continue
    return bootstrap_coefs

def calculate_standard_errors(bootstrap_coefs):
    return np.std(bootstrap_coefs, axis=0)

def calculate_bootstrap_coefficients_mean(bootstrap_coefs):
    return np.mean(bootstrap_coefs, axis=0)

def calculate_confidence_intervals(bootstrap_coefs, confidence_level=0.95):
    # Calculate the lower and upper percentiles to establish the confidence interval
    lower_percentile = (1 - confidence_level) / 2.0 * 100
    upper_percentile = (1 + confidence_level) / 2.0 * 100
    # return the confidence intervals as a pair of values for each coefficient
    confidence_intervals = np.percentile(bootstrap_coefs, [lower_percentile, upper_percentile], axis=0) 
    return confidence_intervals

def calculate_p_values(weights, bootstrap_standard_errors):
    wald_stats = (weights / bootstrap_standard_errors) ** 2
    p_values = 1 - chi2.cdf(wald_stats, 1)
    return p_values

def calculate_sensitivity(X_test, y_test, y_sample, model):
    y_pred = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_sample, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    return sensitivity

### Output a MNL coefficient table with confidence intervals and p-values

In [47]:

# Assuming model_log_multi, class_names, feature_names are already defined
# Initialize an empty DataFrame to store results
# coefficient_output = pd.DataFrame()
pvalue_output = pd.DataFrame()
confidence_interval_output = pd.DataFrame()
mean_output = pd.DataFrame()

# Generate bootstrap coefficients for CI and p-values
bootstrap_coefficients = get_bootstrap_coefficients(X, y_multiclass, 1000, model_log_multi)
bootstrap_se = calculate_standard_errors(bootstrap_coefficients)
confidence_intervals = calculate_confidence_intervals(bootstrap_coefficients)
coefficient_mean = calculate_bootstrap_coefficients_mean(bootstrap_coefficients)
p_values = calculate_p_values(model_log_multi.coef_, bootstrap_se)
# sensitivity = calculate_sensitivity(X, y_multiclass, y_multiclass, model_log_multi)

for index, class_name in enumerate(class_names):
    # coefs = model_log_multi.coef_[index]
    cis = confidence_intervals[:, index, :]  # CI for the current class
    combined_cis = [f"({low:.2f}, {high:.2f})" for low, high in zip(cis[0], cis[1])]
    p_vals = p_values[index, :]  # P-values for the current class
    coef_mean = coefficient_mean[index, :]  # Mean of the coefficients for the current class

    # Create statistics for each LR with coefficients, CIs, means and p-values
    # coefficient_df = pd.DataFrame({
    #     f"{class_name}_coef": coefs,
    # }, index=feature_names)

    pvalue_df = pd.DataFrame({
        f"{class_name}_p_value": p_vals
    }, index=feature_names)

    confidence_interval_df = pd.DataFrame({
        f"{class_name}_CI": combined_cis
    }, index=feature_names)

    coefficient_mean_df = pd.DataFrame({
        f"{class_name}_mean": coef_mean
    }, index=feature_names)


    # Append the coeff, pvalue and CI df every iteration
    #  coefficient_output = pd.concat([coefficient_output, coefficient_df], axis=1) # This doesn't make sense, don't use
    pvalue_output = pd.concat([pvalue_output, pvalue_df], axis=1)
    confidence_interval_output = pd.concat([confidence_interval_output, confidence_interval_df], axis=1)
    mean_output = pd.concat([mean_output, coefficient_mean_df], axis=1)


  wald_stats = (weights / bootstrap_standard_errors) ** 2


In [48]:
# Transpose the dataframe for consistent formatting
# coefficient_output = coefficient_output.T.reset_index().rename(columns={"index": "LR_pair"}) # This doesn't make sense, don't use
pvalue_output = pvalue_output.T.reset_index().rename(columns={"index": "LR_pair"})
confidence_interval_output = confidence_interval_output.T.reset_index().rename(columns={"index": "LR_pair"})
mean_output = mean_output.T.reset_index().rename(columns={"index": "LR_pair"})

#### Save statistics


In [59]:
# Save to a CSV file
mean_output.to_csv("/Users/victoriagao/local_docs/NEST/stored_variables/mean_coeff_64630_Celltype_LR_MNL__SenderORReceiver.csv", index=True)

#### Check top results

In [60]:
# Get index of the top 10 features for each cell type
top_10_indices = {}
for column in mean_output.columns:
    # Sort the column in descending order
    sorted_df = mean_output.sort_values(by=column, ascending=False)
    # Get the indices of the top 10 values
    top_10_indices[column] = sorted_df.index[:10].tolist()


In [61]:
# With the top 10 indices, get the corresponding confidence intervals and means for each cell type
top_10_confidence_intervals = {}
top_10_coef_means = {}
for cell_type, indices in top_10_indices.items():
    # Get the confidence intervals for the top 10 features
    top_10_confidence_intervals[cell_type] = confidence_interval_output.loc[indices]
    top_10_coef_means[cell_type] = mean_output.loc[indices]

# top_10_confidence_intervals['Fibroblast']
# top_10_coef_means['Fibroblast']

In [62]:
top_10_coef_means

{'LR_pair':                   LR_pair   MT1X EC    Tip EC  Venous EC    EHT EC  \
 80          TTR-DDR1_mean  0.783912  0.351283  -0.012426 -0.079264   
 79  TNFSF13-TNFRSF1A_mean -0.114895 -0.161229  -0.001815 -0.014391   
 78        TIMP1-LRP1_mean -0.084424  0.822286   0.158040 -0.144608   
 77        THBS2-LRP1_mean -0.109253 -0.119741  -0.001742 -0.015053   
 76        THBS2-CD47_mean  0.244778 -0.120134  -0.005514 -0.044950   
 75        THBS1-SDC1_mean -0.566904  0.525920  -0.017196 -0.139739   
 74        THBS1-LRP1_mean -0.178151  0.106327  -0.003481 -0.024222   
 73        THBS1-CD47_mean -0.186379  0.071435  -0.003222 -0.024765   
 72        TGFB3-SDC4_mean  0.104939  0.048457  -0.003204  0.872699   
 71         TGFB1-VDR_mean -0.130189 -0.144557  -0.003005 -0.017318   
 
     Arterial EC  Normal Duct    Acinar  Notta BasalA  EMT Duct  ...       mDC  \
 80          0.0    -0.150340 -0.249371     -0.264634  0.321933  ...  1.078022   
 79          0.0    -0.063433 -0.129872   

In [63]:
top_10_coef_means['IL11 CAF']

Unnamed: 0,LR_pair,MT1X EC,Tip EC,Venous EC,EHT EC,Arterial EC,Normal Duct,Acinar,Notta BasalA,EMT Duct,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
46,LGALS3-ITGB4_mean,0.329215,-0.025997,-0.093558,0.189269,0.0,-0.806439,0.019374,0.585773,0.900009,...,-0.025738,0.309367,-0.247014,0.273117,-0.682894,0.042186,0.055942,0.096742,0.311126,0.988626
32,INS-HLA_mean,-0.328689,0.087565,-0.007617,-0.053292,0.0,-0.051688,-0.316896,-0.323712,-0.484994,...,0.046893,0.181045,-0.20218,-0.023034,-0.75262,0.220398,-0.050869,-0.145469,0.730125,0.783284
41,LAMC2-CD151_mean,-0.156873,0.393601,-0.014336,-0.086712,0.0,0.334715,-0.08554,0.310861,-0.183213,...,0.2676,0.331011,0.681473,-0.047174,-1.079903,0.243955,-0.373718,0.418615,-0.664472,0.460662
18,COL1A1-SDC1_mean,-0.060252,-0.220152,-0.010325,-0.097073,0.0,-0.312142,-0.358847,1.119198,0.799439,...,-0.307913,-0.197185,-0.346735,-0.031176,0.799831,-0.524233,0.420783,-0.288205,1.019543,0.44779
75,THBS1-SDC1_mean,-0.566904,0.52592,-0.017196,-0.139739,0.0,-0.526461,0.328172,0.038345,0.090953,...,0.065234,-0.318255,-0.295921,-0.04727,1.28857,-0.07939,-0.223551,0.630286,0.648113,0.347562
67,TGFB1-ITGB5_mean,0.575049,0.931952,-0.045159,0.340804,0.0,-0.673472,0.680157,-0.062305,-1.293083,...,0.381528,-0.239481,-0.495689,-0.121656,0.857758,-0.083066,-0.268799,0.851129,0.842957,0.288553
54,PLXNB2-PTPRK_mean,-0.097766,0.020405,-0.001566,-0.012638,0.0,0.79857,-0.110935,-0.107042,-0.164764,...,-0.059784,-0.032808,-0.054942,-0.004913,-0.280454,-0.106387,-0.141072,-0.041754,-0.130628,-0.011211
4,ARF1-LRP5_mean,-0.114705,-0.174356,-0.00176,-0.014007,0.0,0.407474,-0.137097,-0.122981,-0.194337,...,-0.079249,-0.038167,-0.062833,-0.005586,-0.308577,-0.123021,-0.168863,-0.048623,-0.146328,-0.012694
9,CALR-LRP5_mean,-0.116389,-0.177265,-0.00178,-0.014161,0.0,0.426133,-0.139047,-0.124767,-0.19699,...,-0.080419,-0.038611,-0.063736,-0.005647,-0.312046,-0.124894,-0.171491,-0.049256,-0.148307,-0.012874
37,ITGB1-RACK1_mean,-0.108711,-0.141166,-0.001819,-0.014393,0.0,0.129485,1.501636,-0.115971,-0.178551,...,-0.065074,-0.037696,-0.061763,-0.005704,-0.314224,-0.117323,-0.152471,-0.048966,-0.149061,-0.013047


In [64]:
top_10_coef_means['csCAF']

Unnamed: 0,LR_pair,MT1X EC,Tip EC,Venous EC,EHT EC,Arterial EC,Normal Duct,Acinar,Notta BasalA,EMT Duct,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
66,TGFB1-ENG_mean,0.317829,-0.006938,0.542439,-0.099869,0.0,-0.614702,-0.45444,0.112053,-0.223972,...,-0.280828,0.318692,-0.313638,-0.03681,1.807552,0.942952,-0.026652,-0.275441,2.082255,-0.083886
78,TIMP1-LRP1_mean,-0.084424,0.822286,0.15804,-0.144608,0.0,-0.142257,0.428441,-0.040276,-0.707541,...,-0.09161,-0.101962,0.90876,-0.043363,1.531711,0.668283,0.670497,0.24519,1.79768,-0.122849
64,TGFB1-ACVRL1_mean,-0.111204,-0.129257,-0.001725,-0.016677,0.0,-0.094235,-0.102924,-0.107971,-0.162901,...,-0.053801,-0.035128,-0.058416,-0.005474,1.474168,-0.135413,-0.153815,-0.054031,1.219993,-0.014985
18,COL1A1-SDC1_mean,-0.060252,-0.220152,-0.010325,-0.097073,0.0,-0.312142,-0.358847,1.119198,0.799439,...,-0.307913,-0.197185,-0.346735,-0.031176,0.799831,-0.524233,0.420783,-0.288205,1.019543,0.44779
67,TGFB1-ITGB5_mean,0.575049,0.931952,-0.045159,0.340804,0.0,-0.673472,0.680157,-0.062305,-1.293083,...,0.381528,-0.239481,-0.495689,-0.121656,0.857758,-0.083066,-0.268799,0.851129,0.842957,0.288553
26,FN1-SDC1_mean,0.626885,-0.457048,-0.032155,-0.121602,0.0,-0.386714,-0.465183,-0.076746,0.619122,...,-0.250366,-0.269705,-0.148873,-0.039209,0.567834,1.966296,-0.266451,-0.335618,0.817007,-0.102582
32,INS-HLA_mean,-0.328689,0.087565,-0.007617,-0.053292,0.0,-0.051688,-0.316896,-0.323712,-0.484994,...,0.046893,0.181045,-0.20218,-0.023034,-0.75262,0.220398,-0.050869,-0.145469,0.730125,0.783284
75,THBS1-SDC1_mean,-0.566904,0.52592,-0.017196,-0.139739,0.0,-0.526461,0.328172,0.038345,0.090953,...,0.065234,-0.318255,-0.295921,-0.04727,1.28857,-0.07939,-0.223551,0.630286,0.648113,0.347562
74,THBS1-LRP1_mean,-0.178151,0.106327,-0.003481,-0.024222,0.0,-0.668146,0.122611,0.233172,-0.241298,...,-0.085542,-0.055856,-0.097923,-0.008667,1.837819,0.219367,0.171857,-0.090153,0.637316,-0.025558
3,APOE-SDC4_mean,-0.228132,-0.029926,-0.013026,-0.089227,0.0,-1.321021,-0.499191,-0.517779,0.856357,...,0.555196,1.449139,-0.306215,0.784781,-1.048993,-0.366321,-0.673766,-0.251152,0.59069,-0.072084


In [65]:
top_10_coef_means['myCAF']

Unnamed: 0,LR_pair,MT1X EC,Tip EC,Venous EC,EHT EC,Arterial EC,Normal Duct,Acinar,Notta BasalA,EMT Duct,...,mDC,GRN TAM,SPP1 TAM,LAMP3 DC,myCAF,qPSC,smPSC,Myocyte,csCAF,IL11 CAF
74,THBS1-LRP1_mean,-0.178151,0.106327,-0.003481,-0.024222,0.0,-0.668146,0.122611,0.233172,-0.241298,...,-0.085542,-0.055856,-0.097923,-0.008667,1.837819,0.219367,0.171857,-0.090153,0.637316,-0.025558
66,TGFB1-ENG_mean,0.317829,-0.006938,0.542439,-0.099869,0.0,-0.614702,-0.45444,0.112053,-0.223972,...,-0.280828,0.318692,-0.313638,-0.03681,1.807552,0.942952,-0.026652,-0.275441,2.082255,-0.083886
77,THBS2-LRP1_mean,-0.109253,-0.119741,-0.001742,-0.015053,0.0,-0.476457,-0.101167,-0.10625,-0.160487,...,-0.053404,-0.035259,-0.058736,-0.005546,1.591998,-0.137017,-0.151601,-0.055356,0.583161,-0.015364
78,TIMP1-LRP1_mean,-0.084424,0.822286,0.15804,-0.144608,0.0,-0.142257,0.428441,-0.040276,-0.707541,...,-0.09161,-0.101962,0.90876,-0.043363,1.531711,0.668283,0.670497,0.24519,1.79768,-0.122849
23,FN1-ITGB5_mean,-0.228128,-0.271746,-0.003942,-0.033082,0.0,0.351465,-0.016425,-0.230992,0.320795,...,-0.120059,-0.076864,-0.131891,-0.012075,1.489399,-0.277575,0.195155,-0.120846,0.350972,-0.034027
64,TGFB1-ACVRL1_mean,-0.111204,-0.129257,-0.001725,-0.016677,0.0,-0.094235,-0.102924,-0.107971,-0.162901,...,-0.053801,-0.035128,-0.058416,-0.005474,1.474168,-0.135413,-0.153815,-0.054031,1.219993,-0.014985
25,FN1-RPSA_mean,-0.739042,0.295083,-0.040699,-0.124664,0.0,-0.662854,0.558823,0.120917,0.27469,...,0.588434,-0.597468,-0.380692,-0.091857,1.453101,0.05729,-0.060239,1.614867,0.366396,-0.283313
75,THBS1-SDC1_mean,-0.566904,0.52592,-0.017196,-0.139739,0.0,-0.526461,0.328172,0.038345,0.090953,...,0.065234,-0.318255,-0.295921,-0.04727,1.28857,-0.07939,-0.223551,0.630286,0.648113,0.347562
17,COL1A1-ITGA5_mean,-0.119659,-0.130979,-0.001984,-0.015789,0.0,-0.537242,1.125605,-0.118293,-0.181484,...,-0.063154,-0.040855,-0.068279,-0.006276,1.180472,-0.131013,-0.166133,0.115136,-0.170156,-0.0165
44,LGALS3-EGFR_mean,0.404697,0.160253,-0.005915,-0.032084,0.0,-0.606987,-0.226901,-0.23219,-0.370615,...,-0.129529,-0.084373,-0.142958,-0.0129,0.904631,0.688428,-0.072493,-0.115151,-0.208562,-0.033598


: 

In [None]:
# convert top_10_confidence_intervals['fibroblast'] to a dataframe
celltype_top_10_CI = pd.DataFrame(top_10_confidence_intervals['Fibroblast'])
celltype_top_10_coef_mean = pd.DataFrame(top_10_coef_means['Fibroblast'])