In [2]:
import scanpy as sc
import squidpy as sq
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
import os
import gzip
import numpy as np

plt.rcParams['figure.dpi'] = 150
plt.rcParams['font.family'] = ['serif']
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

## class for handle squdipy data from vizum hd expiment:

In [None]:
class viziumHD:
    def init(self,path,outPath):
        self.path = path
        self.outPath = outPath
        self.parquet_to_csv()
        self.andata = self.readVizHD()
        
    def parquet_to_csv(self):
        file_path = os.path.join(self.path,'tissue_positions_list.csv')
        # Read the Parquet file
        if os.path.exists(file_path):
            return
        else:
            df = pd.read_parquet(os.path.join(self.path,'tissue_positions.parquet'))
            # Write to a CSV file
            df.to_csv(os.path.join(self.path,'tissue_positions_list.csv'), index=False)
    
    def readVizHD(self):
        return sc.read_visium(path = self.path)
    
    def qcReport(self):
        sc.pp.calculate_qc_metrics(self.andata, inplace=True)
        with PdfPages(os.path.join(self.outPath, 'Quality_Control.pdf')) as pdf:
            fig, axs = plt.subplots(1, 4, figsize=(20, 5))  # Adjusted figsize for better readability

            # Plot for total counts
            sns.histplot(self.andata.obs["total_counts"], kde=False, ax=axs[0])
            axs[0].set_title('Total Counts per Cell')
            axs[0].set_xlabel('Total Counts')
            axs[0].set_ylabel('Frequency')

            # Plot for total counts with a threshold
            sns.histplot(
                self.andata.obs["total_counts"][self.andata.obs["total_counts"] < 10000],
                kde=False,
                bins=40,
                ax=axs[1],
            )
            axs[1].set_title('Total Counts per Cell (Threshold < 10,000)')
            axs[1].set_xlabel('Total Counts')
            axs[1].set_ylabel('Frequency')

            # Plot for number of genes by counts
            sns.histplot(self.andata.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
            axs[2].set_title('Number of Genes Detected per Cell')
            axs[2].set_xlabel('Number of Genes')
            axs[2].set_ylabel('Frequency')

            # Plot for number of genes by counts with a threshold
            sns.histplot(
                self.andata.obs["n_genes_by_counts"][self.andata.obs["n_genes_by_counts"] < 4000],
                kde=False,
                bins=60,
                ax=axs[3],
            )
            axs[3].set_title('Number of Genes Detected per Cell (Threshold < 4,000)')
            axs[3].set_xlabel('Number of Genes')
            axs[3].set_ylabel('Frequency')

            fig.tight_layout()
            pdf.savefig()
            plt.close()
            
            
        