## Plots from DEG hits post Seurat + Metascape analysis

In [6]:
# test_imports in this cell, restart kernel whenever edits to modules are made

import sys
import os
import numpy as np
import pandas as pd
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import pyplot as plt
from matplotlib import gridspec
import seaborn as sns

# Append the src directory to the path
current_dir = os.getcwd()
src_dir = os.path.abspath(os.path.join(current_dir, '..', 'src'))
if src_dir not in sys.path:
    sys.path.append(src_dir)

working_dir = "/Users/Akanksha/JugnuLab/data_analysis/datasets/snRNA_ERCC1_KO_MF/DEG_pathway_analysis/deg_KO_group/"

In [9]:
########## file paths ##########

#fast2b
female_fast2b_path = os.path.join(working_dir, "FastIIB/Female_FastIIB_unfiltered_KO_DEGs.csv")
male_fast2b_path = os.path.join(working_dir, "FastIIB/Male_Fast IIB_unfiltered_KO_DEGs.csv")
#fast2x
female_fast2x_path = os.path.join(working_dir, "FastIIX/Female_FastIIX_unfiltered_KO_DEGs.csv")
male_fast2x_path = os.path.join(working_dir, "FastIIX/Male_Fast IIX_unfiltered_KO_DEGs.csv")
#faps
female_faps_path = os.path.join(working_dir, "FAPs/Female_FAPs_unfiltered_KO_DEGs.csv")
male_faps_path = os.path.join(working_dir, "FAPs/Male_FAPs_unfiltered_KO_DEGs.csv")
#musc
female_musc_path = os.path.join(working_dir, "MuSc/Female_Skeleton MuSc_unfiltered_KO_DEGs.csv")
male_musc_path = os.path.join(working_dir, "MuSc/Male_Skeleton MuSc_unfiltered_KO_DEGs.csv")


In [36]:
############ load DEGs ############
# load male and female DEG per cell type to memory in order to find common ros (merge pandas usins the common genes into another dataframe)
path_female_degs = female_musc_path
path_male_degs = male_musc_path
female_degs = pd.read_csv(path_female_degs, header=0)
male_degs = pd.read_csv(path_male_degs, header=0)
#convertlog2FC to ratio of fold change by exponentiating with base 2
female_degs['avg_FC'] = 2**female_degs['avg_log2FC']
male_degs['avg_FC'] = 2**male_degs['avg_log2FC']

In [37]:
#find common genes from matching gene names and merge the dataframes using those rows that have the common gene name
common_genes = pd.merge(female_degs, male_degs, on='Unnamed: 0', how='inner', suffixes=('_female', '_male'))
common_genes = common_genes.rename(columns={'Unnamed: 0':'gene_name'})
# Filter rows where the sign of 'avg_log2FC' matches (both positive or both negative)
filtered_degs = common_genes[((common_genes['avg_log2FC_female'] > 0) & (common_genes['avg_log2FC_male'] > 0)) | 
                                      ((common_genes['avg_log2FC_female'] < 0) & (common_genes['avg_log2FC_male'] < 0))]
#drop some columns
filtered_degs = filtered_degs.drop(columns = ['pct.1_female', 'pct.1_male', 'pct.2_female', 'pct.2_male', 'p_val_female', 'p_val_male'])
#only keep rows where pval_adj is less than 0.05 in both female and male columns
filtered_degs = filtered_degs[(filtered_degs['p_val_adj_female'] < 0.05) & (filtered_degs['p_val_adj_male'] < 0.05)]
print(filtered_degs.head())
print(filtered_degs.shape)

  gene_name  avg_log2FC_female  p_val_adj_female  avg_FC_female  \
0    Malat1          -0.492124      1.070925e-12       0.710978   
2    mt-Co3           0.836589      5.953707e-07       1.785823   
4   mt-Atp6           0.884849      4.210982e-06       1.846572   
6    mt-Co2           0.779252      1.026262e-04       1.716240   

   avg_log2FC_male  p_val_adj_male  avg_FC_male  
0        -0.310829    1.609211e-04     0.806179  
2         0.832779    6.106266e-11     1.781113  
4         0.725872    2.340309e-05     1.653900  
6         0.599130    1.816664e-03     1.514803  
(4, 7)


In [38]:
#split the df by if the avg_log2FC column is + or - and call the plus df as upregulated and minus df as downregulated
upregulated_common = filtered_degs[filtered_degs['avg_log2FC_female'] > 0]
downregulated_common = filtered_degs[filtered_degs['avg_log2FC_female'] < 0]   
print(upregulated_common.shape)
print(downregulated_common.shape) 

(3, 7)
(1, 7)


In [39]:
#write to csv called common_MF_DEGs
upregulated_common.to_csv(os.path.join(working_dir, "MuSc/common_MF_DEGs_upregulated.csv"), index=False)
downregulated_common.to_csv(os.path.join(working_dir, "MuSc/common_MF_DEGs_downregulated.csv"), index=False)

In [None]:
# # export this pandas dataframe to google sheets pygsheets
# import pygsheets
# #authorization
# gc = pygsheets.authorize(service_file='/Users/Akanksha/JugnuLab/data_analysis/credentials.json')
# #open the google spreadsheet (where 'PY to Gsheet Test' is the name of my sheet)
# sh = gc.open('DEGs_common_genes')
# #select the first sheet
# wks = sh[0]
# #update the first sheet with df, starting at cell B2.
# wks.set_dataframe(filtered_degs, 'B2')
# print("Dataframe successfully uploaded to google sheets")