In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
sc.settings.verbosity = 0
sc.settings.set_figure_params(
    dpi=80,
    facecolor="white",
    frameon=False,
)


In [None]:
adata = sc.read_h5ad("/work/focal_crtical_prepr/focal_cortical.h5ad")

In [None]:
adata

In [None]:
adata_raw = adata.raw.to_adata()

In [None]:
import os

results_dir = "focal_crtical_prepr/Raw_preprocessed"

# Create the directory if it doesn't exist
os.makedirs(results_dir, exist_ok=True)

# Now write the file
results_file = os.path.join(results_dir,"focal_cortical_processed_RAW.h5ad")
adata_raw.write(results_file)

In [None]:
adata_raw

In [None]:
# Filtering low quality cells

# mitochondrial genes
adata_raw.var["mt"] = adata_raw.var_names.str.startswith("mt-")
# ribosomal genes
adata_raw.var["ribo"] = adata_raw.var_names.str.startswith(("RPS", "RPL"))
# hemoglobin genes.
adata_raw.var["hb"] = adata_raw.var_names.str.contains(("^HB[^(P)]"))

In [None]:
sc.pp.calculate_qc_metrics(
    adata_raw, qc_vars=["mt", "ribo", "hb"], inplace=True, percent_top=[20], log1p=True
)
adata_raw

In [None]:
sc.pl.highest_expr_genes(adata_raw, n_top=20)

In [None]:
print("Min:", adata_raw.X.min(), "Max:", adata_raw.X.max(), "Mean:", adata_raw.X.mean())

In [None]:
sc.pp.filter_cells(adata_raw, min_genes=200)
sc.pp.filter_genes(adata_raw, min_cells=3)

In [None]:
adata_raw

In [None]:
# Generate scatter plot and return axis object
ax = sc.pl.scatter(adata_raw, x="total_counts", y="n_genes_by_counts", show=False)

# Add a red horizontal line at y = 2500
ax.axhline(y=4000, color='red', linestyle='--', linewidth=1.5)

# Show the modified plot
plt.show()

In [None]:
adata_raw = adata_raw[adata_raw.obs.n_genes_by_counts < 4000, :]
adata_raw = adata_raw[adata_raw.obs.pct_counts_mt < 10, :].copy()

In [None]:
adata_raw

In [None]:
adata_raw.write(results_file)

In [None]:
adata_normalized = adata_raw.copy()

In [None]:
sc.pp.normalize_total(adata_normalized, target_sum=1e4)
sc.pp.log1p(adata_normalized)

In [None]:
import os

results_dir = "focal_crtical_prepr/Normalized_preprocessed"

# Create the directory if it doesn't exist
os.makedirs(results_dir, exist_ok=True)

# Now write the file
results_file = os.path.join(results_dir,"focal_cortical_processed_normalized.h5ad")
adata_normalized.write(results_file)

In [None]:
adata_normalized.write(results_file)

In [None]:
#print("Min:", adata_raw.X.min(), "Max:", adata_raw.X.max(), "Mean:", adata_raw.X.mean())

In [None]:
#print("Min:", adata_normalized.X.min(), "Max:", adata_normalized.X.max(), "Mean:", adata_normalized.X.mean())

In [None]:
adata_raw

In [None]:
adata_normalized