In [5]:
import scanpy as sc
import numpy as np
import csv
import pickle
import matplotlib
import math
from itertools import combinations
import pandas as pd
import matplotlib
# from pyvis.network import Network
import networkx as nx
from networkx.algorithms import bipartite
# from cdlib import algorithms
# from cdlib import NodeClustering


spot_diameter = 89.43 #pixels

In [6]:
def preprocessDf(df):
  """Transform ligand and receptor columns."""
  df["ligand-receptor"] = df["ligand"] + '-' + df["receptor"]
  df["component"] = df["component"] #.astype(str).str.zfill(2)

  return df

In [8]:
# Load gene_ids
gene_ids = []
with open("/Users/victoriagao/local_docs/NEST/stored_variables/gene_ids.txt", 'r') as file:
    for line in file:
        # Remove trailing newline characters and any leading/trailing whitespaces
        line = line.strip()
        gene_ids.append(line)

# Load coordinates
coordinates = np.load("/Users/victoriagao/local_docs/NEST/stored_variables/coordinates.npy")

# Load cell_barcode
with open('/Users/victoriagao/local_docs/NEST/stored_variables/cell_barcode.pkl', 'rb') as file:
    cell_barcode = pickle.load(file)

# Load connected_components (assume already have it)
with open("/Users/victoriagao/local_docs/NEST/stored_variables/filtered_connected_components.pkl", 'rb') as file:
    filtered_connected_components = pickle.load(file)

# Load pathologist's label
data_name = 'PDAC_64630'
if data_name == 'PDAC_64630':
    pathologist_label_file='/Users/victoriagao/local_docs/NEST/input/pathologist_annot/IX_annotation_artifacts_PDAC64630.csv' #IX_annotation_artifacts.csv' #
    pathologist_label=[]
    with open(pathologist_label_file) as file:
        csv_file = csv.reader(file, delimiter=",")
        for line in csv_file:
            pathologist_label.append(line)	
    	
    barcode_type=dict() # record the type (annotation) of each spot (barcode)
    for i in range (1, len(pathologist_label)):
        barcode_type[pathologist_label[i][0]] = pathologist_label[i][1]

In [9]:
filename_str = 'NEST_combined_output_PDAC_64630'+'.csv'
inputFile = '/Users/victoriagao/local_docs/NEST/output/From_Fatema/'+filename_str
df = pd.read_csv(inputFile, sep=",")
csv_record_final = df.values.tolist()
df_column_names = list(df.columns)
csv_record_final = [df_column_names] + csv_record_final

In [27]:
#### i.e. making a list of Dictionary to store matched records for each region
matched_records_set_lib = {}  

for set_index, connected_comp_set in enumerate(filtered_connected_components):
    matched_records = []

    for record in csv_record_final:
        from_cell, to_cell = record[0], record[1]
        if (from_cell in connected_comp_set) and (to_cell in connected_comp_set): # this is for tumor v tumor spots interaction
            matched_records.append(record)
        # if (from_cell in connected_comp_set) ^ (to_cell in connected_comp_set): # this is for tumor v stroma/stroma v tumor spots interaction, XOR argument
        #     matched_records.append(record)
    
    # Store matched records for this set index in the dictionary
    matched_records_set_lib[set_index+1] = matched_records

In [28]:
# Calculate Hill number q=1 for all 21 regions

import math

columns = ['from_cell','to_cell','ligand','receptor','attention_score','component','from_id','to_id']

hill_number_data = []

for key, value in matched_records_set_lib.items():
    df_matched = pd.DataFrame(value, columns=columns)
    df_matched_processed = preprocessDf(df_matched)
    ligand_receptor_counts = df_matched_processed['ligand-receptor'].value_counts()
    count_df = pd.DataFrame({'ligand-receptor': ligand_receptor_counts.index, 'count': ligand_receptor_counts.values})
    LR_richness = count_df.shape[0]
    print("LR richness:",LR_richness)
    # Summing
    total_counts = count_df['count'].sum()
    # Calculate the proportions
    count_df['proportion'] = count_df['count'] / total_counts
    # Calculate the Shannon-Wiener Diversity Index (H')
    shannon_entropy = -(count_df['proportion'] * count_df['proportion'].apply(math.log)).sum()
    # Calculate the Hill number (effective number of types) for q = 1
    hill_number_q1 = math.exp(shannon_entropy)
    ## Normalize Hill number by dividing by richness
    if LR_richness != 0:
        normalized_hill_number = hill_number_q1/LR_richness
    else:
        normalized_hill_number = 0
    # Print or display the calculated Shannon-Wiener Diversity Index
    print("For set "+str(key))
    print("Shannon-Wiener Diversity Index (H'): {:.4f}".format(shannon_entropy))
    print("Hill number of order 1: {:.4f}".format(hill_number_q1))
    print("Normalized Hill number:",normalized_hill_number)

    hill_number_data.append({'Set': key, 'Hill_number_q1': hill_number_q1,'normalized_hill_number': normalized_hill_number}) # putting the hill numbers into a list
    
# Create a DataFrame from the collected data
hill_number_df = pd.DataFrame(hill_number_data)
hill_number_dict = hill_number_df.set_index('Set')['normalized_hill_number'].to_dict()

LR richness: 39
For set 1
Shannon-Wiener Diversity Index (H'): 3.2846
Hill number of order 1: 26.6979
Normalized Hill number: 0.6845624880765441
LR richness: 5
For set 2
Shannon-Wiener Diversity Index (H'): 1.4942
Hill number of order 1: 4.4557
Normalized Hill number: 0.8911319467025024
LR richness: 79
For set 3
Shannon-Wiener Diversity Index (H'): 3.5169
Hill number of order 1: 33.6791
Normalized Hill number: 0.4263176131231739
LR richness: 35
For set 4
Shannon-Wiener Diversity Index (H'): 3.1094
Hill number of order 1: 22.4073
Normalized Hill number: 0.6402096972929471
LR richness: 53
For set 5
Shannon-Wiener Diversity Index (H'): 3.3757
Hill number of order 1: 29.2451
Normalized Hill number: 0.5517944680290484
LR richness: 0
For set 6
Shannon-Wiener Diversity Index (H'): -0.0000
Hill number of order 1: 1.0000
Normalized Hill number: 0
LR richness: 16
For set 7
Shannon-Wiener Diversity Index (H'): 2.5062
Hill number of order 1: 12.2586
Normalized Hill number: 0.7661637139511518
LR ri

In [29]:
# save the results
with open('/Users/victoriagao/local_docs/NEST/stored_variables/64630_hill_withinTumor.pickle', 'wb') as file:
    pickle.dump(hill_number_dict, file)

In [30]:
# with open('/Users/victoriagao/local_docs/NEST/stored_variables/64630_hill_TS_interact.pickle', 'rb') as file:
#     loaded_dict = pickle.load(file)

In [31]:
# loaded_dict

{1: 0.8244411250279177,
 2: 0.9570579153422437,
 3: 0.6121921469058059,
 4: 0.6169169461589945,
 5: 0.6988701678116384,
 6: 1.0,
 7: 0.8964972067642384,
 8: 0.7991367703609603,
 9: 0.9473228540689989,
 10: 0.0,
 11: 0.820498824997882,
 12: 0.0,
 13: 0.9473228540689989,
 14: 1.0000000000000002,
 15: 1.0,
 16: 1.0,
 17: 1.0,
 18: 1.0000000000000002,
 19: 0.0,
 20: 1.0,
 21: 0.0}