In [None]:
import pandas as pd
import numpy as np

In [None]:
expression_file = "/home/user-kp/anugreha/SL_cancer/K-562/hepatocyte_expression_matrix.csv"
gene_list_file = "/home/user-kp/anugreha/SL_cancer/K-562/gene_symbols_k562.csv"
output_csv = "/home/user-kp/anugreha/SL_cancer/matrices/hepatocyte_matrix.csv"
threshold = 0  # Expression ≤ 0 → absent, > 0 → present

print("Loading gene list...")
gene_list = pd.read_csv(gene_list_file)['Gene'].tolist()

print("Loading expression matrix...")
expression_df = pd.read_csv(expression_file).set_index('Gene')

# Find overlapping genes
valid_genes = sorted(list(set(gene_list) & set(expression_df.index)))
print(f"Found {len(valid_genes)} overlapping genes: {valid_genes}")

filtered_df = expression_df.loc[valid_genes]

binary_present = (filtered_df > threshold).astype(int)  # 1=present, 0=absent
binary_absent = 1 - binary_present  # Flip to 0=present, 1=absent

#print("\nBinary absent matrix (1=absent, 0=present):")
#print(binary_absent)

co_absence_matrix = binary_absent.dot(binary_absent.T)  # Genes × Genes

co_absence_matrix.to_csv(output_csv)
print(f"\nSaved dense matrix to: {output_csv}")

In [None]:
df = pd.read_csv('/home/user-kp/anugreha/SL_cancer/matrices/hepatocyte_matrix.csv')
shape = df.shape

print(f"The shape of the matrix is: {shape}")


potential SL

In [None]:
output_csv = "/home/user-kp/anugreha/SL_cancer/matrices/hepatocyte_matrix.csv"
co_absence_matrix = pd.read_csv(output_csv, index_col=0)
matrix = co_absence_matrix.values

max_count = np.max(matrix)
print(f"Highest count in the matrix: {max_count}")

upper_tri_indices = np.triu_indices_from(matrix, k=1)
upper_tri_values = matrix[upper_tri_indices]

zero_count = np.sum(upper_tri_values == 0)
print(f"Number of zeros in the upper triangular (excluding diagonal): {zero_count}")

n = matrix.shape[0]  # Number of genes
total_pairs = n * (n - 1) // 2  # Upper triangular pairs excluding diagonal
percentage_zeros = (zero_count / total_pairs) * 100

print(f"Total genes (n): {n}")
print(f"Total possible gene pairs (upper triangular, excluding diagonal): {total_pairs}")
print(f"Percentage of zeros: {percentage_zeros:.2f}%")

In [None]:
input_matrix = "/home/user-kp/anugreha/SL_cancer/matrices/hepatocyte_matrix.csv"
output_file = "/home/user-kp/anugreha/SL_cancer/potential_SL/hepatocyte.csv"

co_absence_matrix = pd.read_csv(input_matrix, index_col=0)
matrix = co_absence_matrix.values
gene_names = co_absence_matrix.index.tolist()

upper_tri_indices = np.triu_indices_from(matrix, k=1)

zero_pairs = []
for i, j in zip(upper_tri_indices[0], upper_tri_indices[1]):
    if matrix[i, j] == 0:
        zero_pairs.append((gene_names[i], gene_names[j]))

zero_pairs_df = pd.DataFrame(zero_pairs, columns=['Gene1', 'Gene2'])
zero_pairs_df.to_csv(output_file, index=False)
print(f"Found {len(zero_pairs)} zero-count gene pairs.")
print(f"Total genes: {len(gene_names)}")
print(f"Total possible gene pairs (upper triangular): {len(zero_pairs_df)}")
print(f"Saved {len(zero_pairs)} zero-count gene pairs to: {output_file}")