In [1]:
# Import packages
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import scipy.sparse
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr
import csv
import yaml
import random
import re

In [2]:
# Load the processed data
adata = sc.read_h5ad("/Users/aumchampaneri/Databases/Triple/Hs_Nor-CKD-AKF_scRNA_processed.h5ad")

In [4]:
# Load the gene dictionary from the csv file
gene_dict = {}
with open('complement_gene_dictionary.csv', newline='') as file:
    reader = csv.reader(file)
    next(reader)  # Skip header
    for row in reader:
        gene_dict[row[0]] = row[1]

# Extract keys and values into separate lists
gene_dict_names = list(gene_dict.keys())
gene_dict_keys = list(gene_dict.values())

# Change the name of some entries in gene_dict_names to fix plotting errors
gene_dict_names = [re.sub(r'\bC2\b', 'C2_ENSG00000166278', name) for name in gene_dict_names]
gene_dict_names = [re.sub(r'\bC3\b', 'C3_ENSG00000125730', name) for name in gene_dict_names]
gene_dict_names = [re.sub(r'\bC6\b', 'C6_ENSG00000039537', name) for name in gene_dict_names]
gene_dict_names = [re.sub(r'\bC7\b', 'C7_ENSG00000112936', name) for name in gene_dict_names]
gene_dict_names = [re.sub(r'\bC9\b', 'C9_ENSG00000113600', name) for name in gene_dict_names]

# Load the tissue type dictionary from the yaml file
with open("Tissue Type Dictionary.yaml", "r") as file:
    cell_type_group = yaml.safe_load(file)

# Map cell types to groups
adata.obs['cell_type_group'] = 'Other'
for group, cell_types in cell_type_group.items():
    adata.obs.loc[adata.obs['cell_type'].isin(cell_types), 'cell_type_group'] = group

## Test differential expression of complement genes in different disease states

In [3]:
adata.obs

Unnamed: 0,orig.ident,nCount_RNA,SpecimenID,LibraryID,SampleID,Run,clusterNumber,subclass.l1,dataSource,diseasetype,...,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage,observation_joinid,n_genes
S2008000605HRT_AAACCCAGTGCACATT-1,S2008000605HRT,10505.0,S-2008-000605-HRT,5655_EO-2,163-3,5655_EO,2,PC,KPMP,Reference,...,kidney collecting duct principal cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,sixth decade stage,%qABS>?Y(#,3651
S2008000605HRT_AAACCCAGTGCCTACG-1,S2008000605HRT,9698.0,S-2008-000605-HRT,5655_EO-2,163-3,5655_EO,2,PC,KPMP,Reference,...,kidney collecting duct principal cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,sixth decade stage,YkxOG(VHZD,3070
S2008000605HRT_AAAGAACGTAGAGACC-1,S2008000605HRT,1540.0,S-2008-000605-HRT,5655_EO-2,163-3,5655_EO,11,VSMC/MC/FIB,KPMP,Reference,...,kidney interstitial cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,sixth decade stage,6kvlh!ojgC,904
S2008000605HRT_AAAGAACGTCGGAACA-1,S2008000605HRT,4958.0,S-2008-000605-HRT,5655_EO-2,163-3,5655_EO,2,PC,KPMP,Reference,...,kidney collecting duct principal cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,sixth decade stage,bA>ax>bp{_,1975
S2008000605HRT_AAAGAACGTTAACAGA-1,S2008000605HRT,6141.0,S-2008-000605-HRT,5655_EO-2,163-3,5655_EO,2,PC,KPMP,Reference,...,kidney collecting duct principal cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,sixth decade stage,OK#dxbCA<9,3041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Sample1162EO2_TTTGGAGAGCAGGCAT-1,Sample1162EO2,10499.0,Sample1162-EO2,1162-EO-2,108,1162,2,PC,UMICH,Reference,...,kidney connecting tubule epithelial cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,52-year-old stage,K{;U<2SC?^,3253
Sample1162EO2_TTTGGAGAGTCGCGAA-1,Sample1162EO2,3354.0,Sample1162-EO2,1162-EO-2,108,1162,8,EC,UMICH,Reference,...,endothelial cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,52-year-old stage,x>+|^jssu%,1243
Sample1162EO2_TTTGGAGAGTGCTCGC-1,Sample1162EO2,5811.0,Sample1162-EO2,1162-EO-2,108,1162,3,PT,UMICH,Reference,...,epithelial cell of proximal tubule,10x 3' v3,normal,Homo sapiens,male,kidney,European,52-year-old stage,-x)X^JV+yf,1742
Sample1162EO2_TTTGGAGGTGATACTC-1,Sample1162EO2,15384.0,Sample1162-EO2,1162-EO-2,108,1162,7,IC,UMICH,Reference,...,kidney collecting duct principal cell,10x 3' v3,normal,Homo sapiens,male,kidney,European,52-year-old stage,SEh9=NfD|W,3771
