In [1]:
import plotly.express as px
import lostructpy.lostruct as ls
import numpy as np
from skbio.stats.ordination import pcoa
from sklearn.manifold import MDS
import pandas as pd
import umap
import hdbscan
import plotly.io as pio
pio.renderers.default = "notebook_connected"

In [2]:
windows, positions = ls.parse_vcf("chr1-filtered.vcf.gz", "chr1", 95)

In [3]:
result = list()
for x in windows:
    result.append(ls.eigen_windows(x, 10))
result = np.vstack(result)
pc_dists = ls.get_pc_dists(result)
mds = pcoa(pc_dists)
mds_likelostructr = mds



Casting complex values to real discards the imaginary part


Casting complex values to real discards the imaginary part



In [4]:
print(result[0][3].shape)

(10, 50)


In [5]:
mds_coords = pd.read_csv("lostruct-results/mds_coords.csv")
np.corrcoef(mds.samples['PC1'], mds_coords['MDS1'].to_numpy())

array([[1.       , 0.9978494],
       [0.9978494, 1.       ]])

In [6]:
px.scatter(x=mds.samples["PC1"], y=mds_coords['MDS1'])

In [7]:
px.scatter(y=mds.samples["PC1"])

In [8]:
px.scatter(y=mds.samples["PC2"])

# Some looks at other methods of clustering / comparing

In [9]:
embedding = MDS(n_components=10, dissimilarity="precomputed", n_jobs=-1, n_init=32)
mds = embedding.fit_transform(pc_dists)
px.scatter(y=[mds[:,0], mds_coords['MDS1']])

In [10]:
import phate
phater = phate.PHATE(n_components=10, knn_dist='precomputed', mds_solver='smacof', mds='metric')
comparison_phate = phater.fit_transform(pc_dists)

Calculating PHATE...
  Running PHATE on precomputed distance matrix with 124 observations.
  Calculating graph and diffusion operator...
    Calculating affinities...
  Calculating optimal t...
    Automatically selected t = 12
  Calculated optimal t in 0.02 seconds.
  Calculating diffusion potential...
  Calculating metric MDS...
  Calculated metric MDS in 0.25 seconds.
Calculated PHATE in 0.29 seconds.


In [11]:
px.scatter(y=[mds_coords['MDS1'], mds_likelostructr.samples["PC1"], comparison_phate[:,0]])

In [12]:
reducer = umap.UMAP()
embedding = reducer.fit_transform(pc_dists)
px.scatter(x=embedding[:, 0], y=embedding[:, 1])

In [13]:
hdbscan_labels = hdbscan.HDBSCAN().fit_predict(embedding)
px.scatter(x=embedding[:, 0], y=embedding[:, 1], color=hdbscan_labels)

In [14]:
reducer = umap.UMAP(n_components=3)
embedding = reducer.fit_transform(pc_dists)
hdbscan_labels = hdbscan.HDBSCAN().fit_predict(embedding)
fig = px.scatter_3d(x=embedding[:, 0], y=embedding[:, 1], z=embedding[:, 2], color=hdbscan_labels, width=800, height=600)
fig.show()

In [18]:
result[0][0]

masked_array(
  data=[[0.010622151234804648, -0.0015625851898652986,
         0.0005667396351146004, ..., -0.0001999672744519623,
         -0.0022891584069083207, 0.0005667396351146001],
        [-0.0015625851898652986, 0.03933510097478516,
         -0.0006098533116287804, ..., -0.0022873783653575477,
         -0.006451177878028727, -0.0006098533116287811],
        [0.0005667396351146004, -0.0006098533116287804,
         0.0010376438248982211, ..., 0.0005948330331410832,
         -0.0007766169874116905, 0.001037643824898221],
        ...,
        [-0.0001999672744519623, -0.0022873783653575477,
         0.0005948330331410832, ..., 0.04612661642249273,
         -0.0009225566386134093, 0.0005948330331410832],
        [-0.0022891584069083186, -0.00645117787802873,
         -0.0007766169874116903, ..., -0.0009225566386134098,
         0.11167940029559109, -0.0007766169874116905],
        [0.0005667396351146, -0.0006098533116287809,
         0.001037643824898221, ..., 0.0005948330331410832,

In [90]:
testmat = np.matrix([[1, 4, 7], [2,5,8], [3,6,9]])
testmat

matrix([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]])

In [104]:
sqrt_w = np.sqrt(np.array([1,2,4]))[np.newaxis]
sqrt_w

array([[1.        , 1.41421356, 2.        ]])

In [85]:
sqrt_w * testmat

matrix([[ 9.82842712, 23.07106781, 36.3137085 ]])

In [62]:
sqrt_w[np.newaxis].T

array([[1.        ],
       [1.41421356],
       [2.        ]])

In [97]:
testmat * np.tile(sqrt_w, (3, 1))

matrix([[12.        , 16.97056275, 24.        ],
        [15.        , 21.21320344, 30.        ],
        [18.        , 25.45584412, 36.        ]])

In [108]:
x = np.multiply(testmat, sqrt_w.T)
x

matrix([[ 1.        ,  4.        ,  7.        ],
        [ 2.82842712,  7.07106781, 11.3137085 ],
        [ 6.        , 12.        , 18.        ]])

In [110]:
np.multiply(x, sqrt_w)

matrix([[ 1.        ,  5.65685425, 14.        ],
        [ 2.82842712, 10.        , 22.627417  ],
        [ 6.        , 16.97056275, 36.        ]])