In [67]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from bioinfokit.analys import get_data
from bioinfokit.visuz import cluster
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns
import pandas as pd
import time
import matplotlib.patches as mpatches

%matplotlib inline

In [68]:
sns.set(style='white', context='notebook', rc={'figure.figsize':(14,10)})

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 80}
plt.rc('font', **font)

sns.set(font_scale=2)

In [69]:
# Import Data
P2_FBS = pd.read_csv(r"C:\Users\Brandon\Desktop\Work\Lin_Lab\MSC\Data\220217_MSC_UMAP\Results_p2_FBS.csv")
P10_FBS = pd.read_csv(r"C:\Users\Brandon\Desktop\Work\Lin_Lab\MSC\Data\220217_MSC_UMAP\Results_p10_FBS.csv")
P2_stemfit = pd.read_csv(r"C:\Users\Brandon\Desktop\Work\Lin_Lab\MSC\Data\220217_MSC_UMAP\Results_p2_Stemfit.csv")
P10_stemfit = pd.read_csv(r"C:\Users\Brandon\Desktop\Work\Lin_Lab\MSC\Data\220217_MSC_UMAP\Results_p10_Stemfit.csv")

P2_FBS.head()
P10_FBS.head()
P2_stemfit.head()
P10_stemfit.head()

Unnamed: 0,Culture,CD105,CCND2,p16,p21,p38,SABG
0,P10_Stemfit,28.329,48.989,10.713,8.396,17.252,58.717
1,P10_Stemfit,28.892,49.458,11.468,8.866,18.277,58.754
2,P10_Stemfit,28.681,49.433,11.709,8.776,18.879,58.554
3,P10_Stemfit,27.59,49.18,9.873,7.862,17.505,57.513
4,P10_Stemfit,28.822,49.429,10.1,8.602,17.834,59.797


In [70]:
# Concatenate Data
culture_data = pd.concat([P2_FBS, P10_FBS, P2_stemfit, P10_stemfit], ignore_index=True)
culture_data.head()

# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(data)

Unnamed: 0,Culture,CD105,CCND2,p16,p21,p38,SABG
0,P2_FBS,25.475,45.585,7.542,8.465,16.667,60.995
1,P2_FBS,25.07,44.558,7.61,8.971,16.971,58.323
2,P2_FBS,25.991,45.228,8.31,9.261,17.119,60.518
3,P2_FBS,27.297,45.6,7.617,8.572,16.746,61.552
4,P2_FBS,25.937,45.209,7.426,8.82,17.198,60.514


In [None]:
palette = {"P2_FBS":"#4FA7FF",
           "P10_FBS":"#0C0FBA", 
           "P2_Stemfit":"#EA940F",
           "P10_Stemfit":"#BF1D00"}

sns.set_style("white")
sns.set_style("ticks")

g = sns.pairplot(culture_data, hue='Culture', palette=palette)

g._legend.remove()

In [None]:
import umap

In [None]:
reducer = umap.UMAP()

In [None]:
marker_data = culture_data[["CD105", "CCND2", "p16", "p21", "p38", "SABG"]].values
embedding = reducer.fit_transform(marker_data)
embedding.shape

In [None]:
fig, ax = plt.subplots(figsize = [20,10])

ax.spines["top"].set_linewidth(5)
ax.spines["left"].set_linewidth(5)
ax.spines["right"].set_linewidth(5)
ax.spines["bottom"].set_linewidth(5)

plt.scatter(embedding[:, 0], embedding[:, 1], c=[palette[x] for x in culture_data.Culture], s=1000)

# plt.xlabel("UMAP 1")
# plt.ylabel("UMAP 2")
plt.gca().set_aspect('equal', 'datalim')

DMEM_p2_label = mpatches.Patch(facecolor="#4FA7FF", edgecolor = 'black', linewidth = 5, label = "           ")
DMEM_p10_label = mpatches.Patch(facecolor="#0C0FBA", edgecolor = 'black', linewidth = 5, label = "           ")
Stemfit_p2_label = mpatches.Patch(facecolor="#EA940F", edgecolor = 'black', linewidth = 5, label = "             ")
Stemfit_p10_label = mpatches.Patch(facecolor="#BF1D00", edgecolor = 'black', linewidth = 5, label = "             ")
plt.legend(handles=[DMEM_p2_label, DMEM_p10_label, Stemfit_p2_label, Stemfit_p10_label], frameon=False, fontsize = 70, borderpad = 0, labelspacing = 0, handlelength = 2, ncol=2)

ax.axes.xaxis.set_visible(False)
ax.axes.yaxis.set_visible(False)

plt.tight_layout()
plt.savefig(fname="MSC_replicative_UMAP")

In [None]:
marker_LDA = LDA(n_components=3).fit_transform(marker_data,culture_data.Culture)

In [None]:
plt.scatter(marker_LDA[:,0], marker_LDA[:, 1], c=[palette[x] for x in culture_data.Culture])

plt.xlabel("LDA 1")
plt.ylabel("LDA 2")
plt.gca().set_aspect('equal', 'datalim')

In [None]:
marker_PCA = PCA().fit(marker_data)

In [None]:
target = culture_data.Culture.to_numpy()

loadings = marker_PCA.components_
marker_PCA.explained_variance_

pca_scores = PCA().fit_transform(marker_data)

cluster.biplot(cscore=pca_scores, loadings=loadings, labels=culture_data.Culture, var1=round(marker_PCA.explained_variance_ratio_[0]*100, 2), var2=round(marker_PCA.explained_variance_ratio_[1]*100, 2))