In [10]:
import numpy as np
import pandas as pd
import scanpy as sc
import scrublet as scr
import scipy.io
import matplotlib.pyplot as plt
import os
import scvelo as scv
import harmonypy as hm
from matplotlib.pyplot import rc_context
import re
import seaborn as sns
from adpbulk import ADPBulk


In [11]:
sc.settings.set_figure_params(dpi=150,fontsize=8)

# Load integrated and harmonized data

In [12]:
datapath = r"/nfs_master/prakrithi/abhay/testis_scripts/"

In [13]:
germcell = sc.read_h5ad(datapath+'germcells.h5ad')

germcell

AnnData object with n_obs × n_vars = 56441 × 3000
    obs: 'samples', 'percent_TE', 'percent_Alu', 'percent_AluY', 'percent_L1', 'percent_LINE', 'percent_LTR', 'percent_SVA', 'doublet', 'predicted', 'n_genes', 'percent_mito', 'percent_ribo', 'n_genes_by_counts', 'total_counts', 'leiden', 'clusters'
    var: 'n_cells', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'clusters_colors', 'dendrogram_clusters', 'dendrogram_leiden', 'dendrogram_samples', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'samples_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

In [19]:
list(germcell.obs.samples.unique())

['Donor1_r1',
 'Donor1_r2',
 'Donor2_r1',
 'Donor2_r2',
 'Donor3_r1',
 'Donor3_r2',
 'Normal_1',
 'Normal_2',
 'Normal_3',
 'Crypto_1',
 'Crypto_2',
 'Cryto_3',
 'iNOA_1',
 'iNOA_2',
 'iNOA_3',
 'OA']

In [20]:
germcell.obs

Unnamed: 0,samples,percent_TE,percent_Alu,percent_AluY,percent_L1,percent_LINE,percent_LTR,percent_SVA,doublet,predicted,n_genes,percent_mito,percent_ribo,n_genes_by_counts,total_counts,leiden,clusters
AAACCTGAGAAACCTA-1-Donor1_r1,Donor1_r1,0.310769,0.215581,0.034718,0.043784,0.0001,0.007920,0.001893,0.070411,False,5493,0.054094,0.00005,5493,20076.0,6,Spermatogonia
AAACCTGAGACTGGGT-1-Donor1_r1,Donor1_r1,0.131544,0.100671,0.005369,0.005369,0.0000,0.001342,0.000000,0.047673,False,411,0.193289,0.00000,411,745.0,0,Elongating_spermatids
AAACCTGAGATCCTGT-1-Donor1_r1,Donor1_r1,0.111072,0.065378,0.003163,0.006327,0.0000,0.003515,0.000000,0.064560,False,1488,0.025659,0.00000,1488,2845.0,0,Elongating_spermatids
AAACCTGAGATGGGTC-1-Donor1_r1,Donor1_r1,0.061363,0.045637,0.002158,0.004317,0.0000,0.001850,0.000617,0.035771,False,1684,0.133518,0.00000,1684,3243.0,7,Pre_pachytene
AAACCTGCACATAACC-1-Donor1_r1,Donor1_r1,0.117040,0.108434,0.005164,0.001721,0.0000,0.001721,0.000000,0.072306,False,320,0.141136,0.00000,320,581.0,3,Post_pachytene
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTCCTCGTGTTAAGA-1-OA,OA,0.079515,0.024259,0.004043,0.002695,0.0000,0.004043,0.000000,0.034958,False,375,0.000000,0.00000,375,742.0,0,Elongating_spermatids
TTTCCTCTCAAGATCC-1-OA,OA,0.038835,0.007767,0.000000,0.003883,0.0000,0.000000,0.000000,0.241935,False,304,0.128155,0.00000,304,515.0,0,Elongating_spermatids
TTTGGTTAGTGTACTC-1-OA,OA,0.032050,0.006290,0.000899,0.002247,0.0000,0.003744,0.000150,0.096902,False,1827,0.012131,0.00000,1827,6677.0,0,Elongating_spermatids
TTTGGTTGTCATCGGC-1-OA,OA,0.359563,0.264684,0.066265,0.033133,0.0000,0.012048,0.006401,0.024532,False,1166,0.045181,0.00000,1166,2656.0,7,Pre_pachytene


# Analysis

In [72]:
def map_condition(x):
    if "Donor" in x:
        return "Normal"
    elif "Normal" in x:
        return "Normal"
    elif "Crypto" in x:
        return "Cryptozoospermia"
    elif "iNOA" in x:
        return "Non-obstructive_azoospermia"
    else:
        return "obstructive_azoospermia"

In [73]:
germcell.obs['condition'] = germcell.obs.samples.map(map_condition)
germcell.obs

Unnamed: 0,samples,percent_TE,percent_Alu,percent_AluY,percent_L1,percent_LINE,percent_LTR,percent_SVA,doublet,predicted,n_genes,percent_mito,percent_ribo,n_genes_by_counts,total_counts,leiden,clusters,Condition,condition
AAACCTGAGAAACCTA-1-Donor1_r1,Donor1_r1,0.310769,0.215581,0.034718,0.043784,0.0001,0.007920,0.001893,0.070411,False,5493,0.054094,0.00005,5493,20076.0,6,Spermatogonia,Diseased,Normal
AAACCTGAGACTGGGT-1-Donor1_r1,Donor1_r1,0.131544,0.100671,0.005369,0.005369,0.0000,0.001342,0.000000,0.047673,False,411,0.193289,0.00000,411,745.0,0,Elongating_spermatids,Diseased,Normal
AAACCTGAGATCCTGT-1-Donor1_r1,Donor1_r1,0.111072,0.065378,0.003163,0.006327,0.0000,0.003515,0.000000,0.064560,False,1488,0.025659,0.00000,1488,2845.0,0,Elongating_spermatids,Diseased,Normal
AAACCTGAGATGGGTC-1-Donor1_r1,Donor1_r1,0.061363,0.045637,0.002158,0.004317,0.0000,0.001850,0.000617,0.035771,False,1684,0.133518,0.00000,1684,3243.0,7,Pre_pachytene,Diseased,Normal
AAACCTGCACATAACC-1-Donor1_r1,Donor1_r1,0.117040,0.108434,0.005164,0.001721,0.0000,0.001721,0.000000,0.072306,False,320,0.141136,0.00000,320,581.0,3,Post_pachytene,Diseased,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTCCTCGTGTTAAGA-1-OA,OA,0.079515,0.024259,0.004043,0.002695,0.0000,0.004043,0.000000,0.034958,False,375,0.000000,0.00000,375,742.0,0,Elongating_spermatids,Diseased,obstructive_azoospermia
TTTCCTCTCAAGATCC-1-OA,OA,0.038835,0.007767,0.000000,0.003883,0.0000,0.000000,0.000000,0.241935,False,304,0.128155,0.00000,304,515.0,0,Elongating_spermatids,Diseased,obstructive_azoospermia
TTTGGTTAGTGTACTC-1-OA,OA,0.032050,0.006290,0.000899,0.002247,0.0000,0.003744,0.000150,0.096902,False,1827,0.012131,0.00000,1827,6677.0,0,Elongating_spermatids,Diseased,obstructive_azoospermia
TTTGGTTGTCATCGGC-1-OA,OA,0.359563,0.264684,0.066265,0.033133,0.0000,0.012048,0.006401,0.024532,False,1166,0.045181,0.00000,1166,2656.0,7,Pre_pachytene,Diseased,obstructive_azoospermia


In [75]:
germcell.obs.condition.unique()

array(['Normal', 'Cryptozoospermia', 'obstructive_azoospermia',
       'Non-obstructive_azoospermia'], dtype=object)

In [76]:
num_tot_cells = germcell.obs.groupby(['samples']).count()
num_tot_cells = dict(zip(num_tot_cells.index, num_tot_cells.percent_TE))

num_tot_cells

{'Donor1_r1': 2515,
 'Donor1_r2': 3883,
 'Donor2_r1': 2508,
 'Donor2_r2': 3464,
 'Donor3_r1': 3201,
 'Donor3_r2': 4402,
 'Normal_1': 9032,
 'Normal_2': 7988,
 'Normal_3': 7162,
 'Crypto_1': 5069,
 'Crypto_2': 922,
 'Cryto_3': 5170,
 'iNOA_1': 2,
 'iNOA_2': 1,
 'iNOA_3': 131,
 'OA': 991}

In [77]:
cell_type_counts = germcell.obs.groupby(['samples','condition', 'clusters']).count()
cell_type_counts = cell_type_counts[cell_type_counts.sum(axis =1) > 0].reset_index()
cell_type_counts = cell_type_counts[cell_type_counts.columns[0:4]]
cell_type_counts = cell_type_counts.rename(columns = {'percent_TE':'cell_count'})

# cell_type_counts.to_csv('germcell_count.csv')

cell_type_counts


Unnamed: 0,samples,condition,clusters,cell_count
0,Donor1_r1,Normal,Elongating_spermatids,701
1,Donor1_r1,Normal,Round_spermatids,411
2,Donor1_r1,Normal,Post_pachytene,590
3,Donor1_r1,Normal,Mature_spermatids,339
4,Donor1_r1,Normal,Spermatogonia,251
...,...,...,...,...
79,OA,obstructive_azoospermia,Round_spermatids,132
80,OA,obstructive_azoospermia,Post_pachytene,175
81,OA,obstructive_azoospermia,Mature_spermatids,117
82,OA,obstructive_azoospermia,Spermatogonia,61


In [78]:
cell_type_counts['total_cells'] = cell_type_counts.samples.map(num_tot_cells).astype(int)

cell_type_counts['frequency'] = cell_type_counts.cell_count/ cell_type_counts.total_cells

cell_type_counts

Unnamed: 0,samples,condition,clusters,cell_count,total_cells,frequency
0,Donor1_r1,Normal,Elongating_spermatids,701,2515,0.278728
1,Donor1_r1,Normal,Round_spermatids,411,2515,0.163419
2,Donor1_r1,Normal,Post_pachytene,590,2515,0.234592
3,Donor1_r1,Normal,Mature_spermatids,339,2515,0.134791
4,Donor1_r1,Normal,Spermatogonia,251,2515,0.099801
...,...,...,...,...,...,...
79,OA,obstructive_azoospermia,Round_spermatids,132,991,0.133199
80,OA,obstructive_azoospermia,Post_pachytene,175,991,0.176589
81,OA,obstructive_azoospermia,Mature_spermatids,117,991,0.118063
82,OA,obstructive_azoospermia,Spermatogonia,61,991,0.061554


In [82]:
import matplotlib.pyplot as plt

plt.figure(figsize = (5,7))

ax = sns.boxplot(data = cell_type_counts, x = 'clusters', y = 'frequency', hue = 'condition')

plt.xticks(rotation = 35, rotation_mode = 'anchor', ha = 'right')

plt.show()
plt.savefig('germcell_freq_plot')