# Microglia Analysis: Feature 24 vs. 57
This notebook is used to compare and analyze feature 24 and feature 57 in microglia. It is recommended to read this file alongside with "Feature 5 vs. 57 Microglia.ipynb" file.

All data that are analyzed in the provided .ipynb files are stored in 'data' subfolder, with the exception of 'p6counts.h5ad' due to file size (3.63 GB). The cell below downloads 'p6counts.h5ad' into that folder.

In [None]:
import os

file_path = 'data/p6counts.h5ad'

if not os.path.exists(file_path):
    !mkdir -p data
    !wget -P data https://us-east-2-scprojectdata.s3.us-east-2.amazonaws.com/p6mouse/p6counts.h5ad

In [None]:
import random
random.seed(a=613)
import numpy as np
import scProject
import scanpy as sc
patterns = sc.read_h5ad('data/patterns_anndata.h5ad')
dataset = sc.read_h5ad('data/p6counts.h5ad')
dataset_filtered, patterns_filtered = scProject.matcher.filterAnnDatas(dataset, patterns, 'id')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
microglia= dataset_filtered[dataset_filtered.obs['assigned_cell_type'].isin(['Microglia'])].copy()
others= dataset_filtered.obs['assigned_cell_type'].unique().remove_categories('Microglia')
rest = dataset_filtered[dataset_filtered.obs['assigned_cell_type'].isin(list(others))].copy()
print(microglia.shape, rest.shape, dataset_filtered.shape)

microglia.X = np.log2(microglia.X + 1e-30) #log transform for statistical tests
rest.X = np.log2(rest.X + 1e-30) #log transform for statistcal tests

plt.rcParams['figure.figsize']= [5,50]
df24 = scProject.stats.projectionDriver(patterns_filtered, microglia, rest,.999999999999,'gene_short_name', 24, display=False)

sigs24 = df24[0].index
WCIS24 = df24[1].loc[sigs24]
WCIS24['rank'] = abs(WCIS24['Low']+WCIS24['High'])
WCIS24 = WCIS24.sort_values(by='rank', ascending=False).head(50)

counter = len(WCIS24)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(WCIS24.index) ,WCIS24['Low'], WCIS24['High'], range(len(WCIS24))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 24 Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF24Weighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
CIS24 = df24[0].loc[WCIS24.index]

counter = len(CIS24)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(CIS24.index) ,CIS24['Low'], CIS24['High'], range(len(CIS24))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 24 Bon Ranked")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF24Bon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
df57 = scProject.stats.projectionDriver(patterns_filtered, microglia, rest,.999999999999,'gene_short_name', 57, display=False)

sigs57 = df57[0].index
five7WCIS = df57[1].loc[sigs57]
five7WCIS['rank'] = abs(five7WCIS['Low']+five7WCIS['High'])
five7WCIS = five7WCIS.sort_values(by='rank', ascending=False).head(50)

counter = len(five7WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(five7WCIS.index) ,five7WCIS['Low'], five7WCIS['High'], range(len(five7WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF57Weighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
five7WCIS = df57[0].loc[five7WCIS.index]

counter = len(five7WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(five7WCIS.index) ,five7WCIS['Low'], five7WCIS['High'], range(len(five7WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF57Bon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 57 exclusive
e57 = df57[0].index.difference(df24[0].index)
exclusive57WCIS = df57[1].loc[e57]
exclusive57WCIS['rank'] = abs(exclusive57WCIS['Low']+exclusive57WCIS['High'])
exclusive57WCIS = exclusive57WCIS.sort_values(by='rank', ascending=False)

counter = len(exclusive57WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 25]
for idx,low, high,y in zip(list(exclusive57WCIS.index) ,exclusive57WCIS['Low'], exclusive57WCIS['High'], range(len(exclusive57WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Exclusive Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF57ExclusiveWeighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
exclusive57CIS = df57[0].loc[exclusive57WCIS.index]

counter = len(exclusive57CIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 25]
for idx,low, high,y in zip(list(exclusive57CIS.index) ,exclusive57CIS['Low'], exclusive57CIS['High'], range(len(exclusive57CIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Exclusive Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF57ExclusiveBon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 24 exclusive
e24 = df24[0].index.difference(df57[0].index)
exclusive24WCIS = df24[1].loc[e24]
exclusive24WCIS['rank'] = abs(exclusive24WCIS['Low']+exclusive24WCIS['High'])
exclusive24WCIS = exclusive24WCIS.sort_values(by='rank', ascending=False)

counter = len(exclusive24WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive24WCIS.index) ,exclusive24WCIS['Low'], exclusive24WCIS['High'], range(len(exclusive24WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 24 Exclusive Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF24ExclusiveWeighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 24 Exclusive Bon CIs
exclusive24CIS = df24[0].loc[exclusive24WCIS.index]

counter = len(exclusive24CIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive24CIS.index) ,exclusive24CIS['Low'], exclusive24CIS['High'], range(len(exclusive24CIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 24 Exclusive Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "MicrogliaF57ExclusiveBon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# shared genes
import pandas as pd
shared = df24[0].index.intersection(df57[0].index)
shared24WCI = df24[1].loc[shared]
shared57WCI = df57[1].loc[shared]
shared24WCI.columns = ['24Low', '24High']
shared57WCI.columns = ['57Low', '57High']
shared24WCI['rank24'] = abs(shared24WCI['24Low']+shared24WCI['24High'])
shared57WCI['rank57'] = abs(shared57WCI['57Low']+shared57WCI['57High'])

tog = pd.concat([shared57WCI, shared24WCI], axis=1)
tog['rank'] = tog['rank24'] + tog['rank57']
tog = tog.sort_values(by='rank', ascending=False).head(50)

# Bon CIs
sharedCIs = df24[0].loc[tog.index]

counter = len(sharedCIs)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(tog.index) ,sharedCIs['Low'], sharedCIs['High'], range(len(sharedCIs))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 24 and 57 Shared Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "2457MG"
filename = "F24andF57shared.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()