# Creating Supplementary Figure 2 to Show Cell Type Markers on MAGIC Data
The purpose of this Jupyter Notebook is to show that the cell type markers used in Farhadian 2018 are visible on different clusters of the MAGIC-imputed data.

### Part 0: Importing Packages

In [1]:
# import statements
import magic
import scprep

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import gzip
import csv

from sklearn.manifold import TSNE
from scprep.io.csv import load_csv

### Part 1: Loading Data

In [2]:
# read in geo objects from local data directory
path = "../../data/raw_data/"
file_names = ['HIV1_Bld.csv', 'HIV1_CSF.csv', 'HIV2_Bld.csv', 'HIV2_CSF.csv','HIV3_CSF.csv', 'Uninfected1_CSF.csv', 'Uninfected2_CSF.csv']

In [3]:
# choose the sample to use for the representative plot
file_to_choose= 0 # one representative sample

### Part 2: Filtering the Data
This is included in case this file is run before the imputation_magic.ipynb, and for completeness to showcase how filtering is done.

In [None]:
# we chose the first sample
file_name = file_names[file_to_choose]
dat = scprep.io.load_csv(path + file_name)
dat = dat.transpose()
    
# Remove empty columns and rows
dat = scprep.filter.filter_empty_cells(dat)
dat = scprep.filter.filter_empty_genes(dat)

# Filter out genes expressed in less than 3 cells
dat = scprep.filter.filter_rare_genes(dat, cutoff=0, min_cells=3)
    
# Filter by library size anything above 500- keep
dat = scprep.filter.filter_library_size(dat, cutoff=500, keep_cells = 'above')
    
#normalize and transform data
dat = scprep.normalize.library_size_normalize(dat)
dat = scprep.transform.sqrt(dat)
dat.head()
    
# save data after preprocessing and before MAGIC
out_path = '../../data/imputation_intermediate/filtered_data/'
out_name = out_path + file_name
dat.to_csv(out_name)

### Part 3: Computing MAGIC
Again, this is included in case this file is run before the imputation_magic.ipynb, and for completeness to showcase how filtering is done.

In [None]:
# compute MAGIC
magic_op = magic.MAGIC()
dat_magic=magic_op.fit_transform(dat)
    
# save csv file
out_path = '../../data/imputation_intermediate/magic_all_samples/'
out_name = out_path + file_name
dat_magic.to_csv(out_name)

### Part 4: t-SNE on MAGIC and Showcasing Markers

In [None]:
# do t-SNE on the MAGIC matrix
tSNE_magic=TSNE(n_components=2).fit_transform(dat_magic)

In [None]:
# these marker genes from from Farhadian 2018
marker_genes = ['TRAC', 'IL7R', 'GNLY', 'CD79A', 'MS4A1', 'IGJ', 'FCGR3A', 'CD14', 'CD1C']

# plot comparison
fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9) = plt.subplots(9,1, figsize=(12, 36), dpi=800)

ax_list = [ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9]
# plot
for i in range(0, len(marker_genes)):
    axis = ax_list[i]
    scprep.plot.scatter2d(tSNE_magic, c=dat_magic[marker_genes[i]], 
                      label_prefix="tSNE", title='tSNE with ' + marker_genes[i],
                      legend_title=marker_genes[i], ax=axis, ticks=False)

plt.savefig('../../output/figures/supp_figure1.png')
plt.show()