In [None]:
import pandas as pd 
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from IPython.display import display

from sklearn.preprocessing import StandardScaler 
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA 
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.datasets import fetch_openml 

from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import squareform
from sklearn.manifold import TSNE

In [None]:

sns.set(rc={'figure.figsize':(11.7,8.27)})


In [None]:
from matplotlib.pyplot import cm


In [None]:
#import data 

TB_sarc_df = pd.read_csv('TB_sarc_overlap_nodum.csv')

#Disease column 
# 0 = sarcoidosis 
# 1 = Tuberculosis

df_TB_sarc = TB_sarc_df.iloc[:,3:35]
df_TBsarc = df_TB_sarc.drop('Tissue', axis = 1)
df_TBsarc = df_TBsarc.drop('lineage', axis = 1)
df_TBsarc = df_TBsarc.drop('cell_lin', axis = 1)


Bcell_df = pd.read_csv('B_cellsv1.csv')
Endo_df = pd.read_csv('Endothelial_cellsv1.csv')
macro_df = pd.read_csv('Macrophagesv1.csv')
mono_df = pd.read_csv('Monocytesv1.csv')
Tcell_df = pd.read_csv('T_cellsv1.csv')




In [None]:
display(df_TBsarc)

In [None]:
#Make copy of each data frame to have original as reference
df_Bcell = Bcell_df.copy()
df_Tcell = Tcell_df.copy()
df_Endo = Endo_df.copy()
df_mono = mono_df.copy()
df_macro = macro_df.copy()



In [None]:
#Drop columns without protein abundance data in prepartion for tsne
df_Bcell.drop('Disease', axis=1)

df_Tcell_num = df_Tcell.drop('Disease', axis = 1)
df_Tcell_num = df_Tcell_num.drop('cell_type', axis = 1)

df_Endo_num = df_Endo.drop('Disease', axis = 1)
#df_Endo_num = df_Endo_num.drop('cell_type', axis = 1)

df_mono_num = df_mono.drop('Disease', axis = 1)
df_mono_num = df_mono_num.drop('cell_type', axis = 1)


df_macro_num = df_macro.drop('Disease', axis = 1)
df_macro_num = df_macro_num.drop('cell_type', axis = 1)

In [None]:
display(df_mono_num)

In [None]:
#tsne - B cells
#default tsne settings used for all cell types: 
#2 components and 30 perplexity

tsne = TSNE()

B_cell_tsne = tsne.fit_transform(df_Bcell)



In [None]:
#plot B cell tsne with disease annotations
disease_bcell = Bcell_df['Disease']


ax = sns.scatterplot(B_cell_tsne[:,0], B_cell_tsne[:,1], hue=disease_bcell)

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1', fontsize = 18)
plt.ylabel('t-SNE 2', fontsize = 18)
plt.title('B cells', fontsize = 28)
plt.savefig('B_cell_tSNE.png')

In [None]:
np.savetxt("Bcell_tsne_v3", B_cell_tsne, delimiter=",")

In [None]:
#tsne - T cells
#default tsne settings used for all cell types: 
#2 components and 30 perplexity

tsne = TSNE()

T_cell_tsne = tsne.fit_transform(df_Tcell_num)

In [None]:
#plot T cell tsne with disease annotations

disease_tcell = Tcell_df['Disease']


ax = sns.scatterplot(T_cell_tsne[:,0], T_cell_tsne[:,1],alpha = 0.4, hue=disease_tcell)

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1', fontsize = 18)
plt.ylabel('t-SNE 2', fontsize = 18)
plt.title('T cells', fontsize=28)
plt.savefig('T_cell_tsne.png')

In [None]:
np.savetxt("Tcell_tsne_v3", T_cell_tsne, delimiter=",")

In [None]:
#tsne - Endothelial cells


tsne = TSNE()

Endo_tsne = tsne.fit_transform(df_Endo_num)

In [None]:
#plot of endothelial cell tsne with disease annotations

disease_endo_cell = Endo_df['Disease']

ax = sns.scatterplot(Endo_tsne[:,0], Endo_tsne[:,1],alpha = 0.7, hue=disease_endo_cell) 

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1', fontsize =18)
plt.ylabel('t-SNE 2', fontsize=18)
plt.title('Endothelial cells', fontsize=28)
plt.savefig('Endothelial_cell_tsne.png')

In [None]:
np.savetxt("endo_cell_tsne_v3", Endo_tsne, delimiter=",")

In [None]:
#tsne - Moncytes

tsne = TSNE()

mono_tsne = tsne.fit_transform(df_mono_num)

In [None]:
#Plot of monocyte tsne with disease annotations

disease_mono_cell = mono_df['Disease']

ax = sns.scatterplot(mono_tsne[:,0], mono_tsne[:,1],alpha=0.4, hue=disease_mono_cell)

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1', fontsize=18)
plt.ylabel('t-SNE 2', fontsize=18)
plt.title('Monocytes', fontsize=28)
plt.savefig('monocyte_tsne.png')

In [None]:
np.savetxt("mono_tsne_v3", mono_tsne, delimiter=",")

In [None]:
#tsne - Macrophages

tsne = TSNE()

macro_tsne = tsne.fit_transform(df_macro_num)

In [None]:
#plot of macrophage tsne with disease annotation

disease_macro_cell = macro_df['Disease']

ax = sns.scatterplot(macro_tsne[:,0], macro_tsne[:,1],alpha=0.4, hue=disease_macro_cell) 

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1',fontsize=18)
plt.ylabel('t-SNE 2',fontsize=18)
plt.title('Macrophages', fontsize=28)
plt.savefig('macrophage_tsne.png')

In [None]:
np.savetxt("macro_tsne_v3", macro_tsne, delimiter=",")

In [None]:
#Clean up sarc and TB overlap data 
#display(TB_sarc_df)

df_TB_sarc = TB_sarc_df.iloc[:,3:30]
display(df_TB_sarc)

In [None]:
#sarc and TB tsne

tsne = TSNE()

TB_sarc_tsne = tsne.fit_transform(df_TB_sarc)

In [None]:
np.savetxt("TB_sarc_tsne_v3", TB_sarc_tsne, delimiter=",")

In [None]:
#sarc and TB tsne plot 
#disease annotations

TB_sarc_disease = TB_sarc_df['Disease']

ax = sns.scatterplot(TB_sarc_tsne[:,0], TB_sarc_tsne[:,1], alpha=0.4,hue=TB_sarc_disease) 

custom = [Line2D([], [], marker='.', color='blue', linestyle='None'),
          Line2D([], [], marker='.', color='orange', linestyle='None')]

ax.legend(custom, ['Sarcoidosis', 'Tuberculosis'], loc='lower right')

plt.xlabel('t-SNE 1', fontsize=18)
plt.ylabel('t-SNE 2', fontsize=18)
plt.title('All Cell Types', fontsize=28) 
plt.savefig('all_cell_types_tsne.png')

In [None]:
#scattplot of tnse data to color by sample ID in case there is a patient dependency in data 

TBsarc_sampleID = TB_sarc_df['SampleID']

sns.scatterplot(TB_sarc_tsne[:,0], TB_sarc_tsne[:,1],alpha=0.4, hue=TBsarc_sampleID) 

plt.xlabel('t-SNE 1', fontsize=18)
plt.ylabel('t-SNE 2', fontsize=18)
plt.title('t-SNE for protein signatures in TB and Sarcoidosis - by sample ID', fontsize=20)
plt.savefig('sample_id_tsne.png')