# Microglia Analysis: Feature 5 vs. 57
This notebook is used to compare and analyze feature 5 and feature 57 in microglia. It is recommended to read this file alongside with "Feature 24 vs. 57 Microglia.ipynb" file.

All data that are analyzed in the provided .ipynb files are stored in 'data' subfolder, with the exception of 'p6counts.h5ad'. The cell below downloads 'p6counts.h5ad' into that folder.

In [None]:
import os

file_path = 'data/p6counts.h5ad'

if not os.path.exists(file_path):
    !mkdir -p data
    !wget -P data https://us-east-2-scprojectdata.s3.us-east-2.amazonaws.com/p6mouse/p6counts.h5ad

In [None]:
import random
random.seed(a=613)
import numpy as np
import scProject
import scanpy as sc
patterns = sc.read_h5ad('data/patterns_anndata.h5ad')
dataset = sc.read_h5ad('data/p6counts.h5ad')
dataset_filtered, patterns_filtered = scProject.matcher.filterAnnDatas(dataset, patterns, 'id')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
microglia= dataset_filtered[dataset_filtered.obs['assigned_cell_type'].isin(['Microglia'])].copy()
others= dataset_filtered.obs['assigned_cell_type'].unique().remove_categories('Microglia')
rest = dataset_filtered[dataset_filtered.obs['assigned_cell_type'].isin(list(others))].copy()
print(microglia.shape, rest.shape, dataset_filtered.shape)

microglia.X = np.log2(microglia.X + 1e-30) #log transform for statistical tests
rest.X = np.log2(rest.X + 1e-30) #log transform for statistcal tests

plt.rcParams['figure.figsize']= [5,50]
df5 = scProject.stats.projectionDriver(patterns_filtered, microglia, rest,.999999999999,'gene_short_name', 5, display=False)

sigs5 = df5[0].index
fiveWCIS = df5[1].loc[sigs5]
fiveWCIS['rank'] = abs(fiveWCIS['Low']+fiveWCIS['High'])
fiveWCIS = fiveWCIS.sort_values(by='rank', ascending=False).head(50)

counter = len(fiveWCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(fiveWCIS.index) ,fiveWCIS['Low'], fiveWCIS['High'], range(len(fiveWCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 5 Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF5Weighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
fiveWCIS = df5[0].loc[fiveWCIS.index]

counter = len(fiveWCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(fiveWCIS.index) ,fiveWCIS['Low'], fiveWCIS['High'], range(len(fiveWCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 5 Ranked")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF5Bon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
df57 = scProject.stats.projectionDriver(patterns_filtered, microglia, rest,.999999999999,'gene_short_name', 57, display=False)

sigs57 = df57[0].index
five7WCIS = df57[1].loc[sigs57]
five7WCIS['rank'] = abs(five7WCIS['Low']+five7WCIS['High'])
five7WCIS = five7WCIS.sort_values(by='rank', ascending=False).head(50)

counter = len(five7WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(five7WCIS.index) ,five7WCIS['Low'], five7WCIS['High'], range(len(five7WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF57Weighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
five7WCIS = df57[0].loc[five7WCIS.index]

counter = len(five7WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(five7WCIS.index) ,five7WCIS['Low'], five7WCIS['High'], range(len(five7WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF57Bon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 57 exclusive
e57 = df57[0].index.difference(df5[0].index)
exclusive57WCIS = df57[1].loc[e57]
exclusive57WCIS['rank'] = abs(exclusive57WCIS['Low']+exclusive57WCIS['High'])
exclusive57WCIS = exclusive57WCIS.sort_values(by='rank', ascending=False)

counter = len(exclusive57WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive57WCIS.index) ,exclusive57WCIS['Low'], exclusive57WCIS['High'], range(len(exclusive57WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Exclusive Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF57ExclusiveWeighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# Bon CIs
exclusive57CIS = df57[0].loc[exclusive57WCIS.index]

counter = len(exclusive57CIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive57CIS.index) ,exclusive57CIS['Low'], exclusive57CIS['High'], range(len(exclusive57CIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 57 Exclusive Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF57ExclusiveBon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 5 exclusive
e5 = df5[0].index.difference(df57[0].index)
exclusive5WCIS = df5[1].loc[e5]
exclusive5WCIS['rank'] = abs(exclusive5WCIS['Low']+exclusive5WCIS['High'])
exclusive5WCIS = exclusive5WCIS.sort_values(by='rank', ascending=False)

counter = len(exclusive5WCIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive5WCIS.index) ,exclusive5WCIS['Low'], exclusive5WCIS['High'], range(len(exclusive5WCIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 5 Exclusive Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF5ExclusiveWeighted.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# 5 Exclusive Bon CIs
exclusive5CIS = df5[0].loc[exclusive5WCIS.index]

counter = len(exclusive5CIS)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(exclusive5CIS.index) ,exclusive5CIS['Low'], exclusive5CIS['High'], range(len(exclusive5CIS))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 5 Exclusive Bon CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "MicrogliaF5ExclusiveBon.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()

In [None]:
# shared genes
import pandas as pd
shared = df5[0].index.intersection(df57[0].index)
shared5WCI = df5[1].loc[shared]
shared57WCI = df57[1].loc[shared]
shared5WCI.columns = ['5Low', '5High']
shared57WCI.columns = ['57Low', '57High']
shared5WCI['rank5'] = abs(shared5WCI['5Low']+shared5WCI['5High'])
shared57WCI['rank57'] = abs(shared57WCI['57Low']+shared57WCI['57High'])

tog = pd.concat([shared57WCI, shared5WCI], axis=1)
tog['rank'] = tog['rank5'] + tog['rank57']
tog = tog.sort_values(by='rank', ascending=False).head(50)

# Bon CIs
sharedCIs = df5[0].loc[tog.index]

counter = len(sharedCIs)-1
yAxis = []
plt.rcParams['figure.figsize']= [5, 15]
for idx,low, high,y in zip(list(tog.index) ,sharedCIs['Low'], sharedCIs['High'], range(len(sharedCIs))):
    plt.plot((low, high), (counter, counter), '-', color='blue')
    if counter == 0:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue', label='Mean')
    else:
        plt.plot((float(low+high)/2.0), counter,'o', color='blue')
    yAxis.insert(0,idx)
    counter-=1

plt.yticks(range(len(yAxis)), yAxis)
plt.title("Microglia Feature 5 and 57 Shared Weighted CIs")
plt.plot((0,0), (0,len(yAxis)), '--', color='black')
plt.ylim(top= len(yAxis))
plt.ylim(bottom=-.5)
plt.legend()

import os
# Directory path and filename
directory = "557MG"
filename = "F5andF57shared.pdf"

# Create the directory if it doesn't exist
if not os.path.exists(directory):
    os.makedirs(directory)

# Combine directory and filename to create the full file path
file_path = os.path.join(directory, filename)

# Save the figure
plt.savefig(file_path, bbox_inches='tight')
plt.show()