In [1]:
import numpy as np
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from plotly import graph_objects as go
from plotly import subplots as sp

from local.caching import save, load

# this is just an optimization package
# may not be available for some systems
try:
    from sklearnex import patch_sklearn
    patch_sklearn()
except: pass

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
mappings = load("1k_cy_mapping")

recovering & decompressing cached data from [{WORKSPACE}/main/scratch/cache/1k_cy_mapping.pkl.gz]


In [3]:
x = np.array([emb for doi, t, a, emb in mappings])
x.shape

(1245, 1536)

In [4]:
model = PCA(n_components=2)
pca_x = model.fit_transform(x)
pca_x.shape

(1245, 2)

In [14]:
rand_seed = 36
model = TSNE(n_components=2, random_state=rand_seed, perplexity=30)
latentx = model.fit_transform(x)
latentx.shape

(1245, 2)

In [21]:
# settings

axis_col = 'rgba(0, 0, 0, 0.15)'
no_col = 'rgba(0, 0, 0, 0)'
axis_desc: dict = dict(linecolor=no_col, gridcolor=axis_col, zerolinecolor=axis_col, zerolinewidth=1)
layout = dict(
    autosize=False,
    width=1400,
    height=1400,
    margin=dict(
        l=25, r=25, b=25, t=25, pad=5
    ),
    # paper_bgcolor="white",
    font_family="Times New Roman",
    font_color="black",
    font_size=20,
    plot_bgcolor='white',
    xaxis=axis_desc,
    yaxis=axis_desc,
    xaxis2=axis_desc,
    yaxis2=axis_desc,
)

In [22]:
def make_traces():
    s, o = 7, 0.3
    return [
        go.Scatter(
            x = [x for x, y in latentx],
            y = [y for x, y in latentx],

            # x = [x for x, y in pca_x],
            # y = [y for x, y in pca_x],
            mode='markers',
            marker=dict(
                size=s,
                color='#3679c6',
                opacity=o
            ),
            text=[f"{t}" for doi, t, a, e in mappings],
        ),
    ]

fig = sp.make_subplots(
    rows=1, cols=1, shared_xaxes=True, shared_yaxes=True, horizontal_spacing=0.02,
    # x_title="% Completeness"
)
for i, tr in enumerate(make_traces()):
    fig.add_trace(tr, row=1, col=i+1)
_layout = layout.copy()
_layout.update(dict(
    # xaxis2=dict(title="", **axis_desc),
    # xaxis=dict(title="", **axis_desc),
    # yaxis=dict(title="% Contamination", **axis_desc),
))
fig.update_annotations(font_size=24)
fig.update_layout(go.Layout(**_layout))
fig.show()

In [28]:
query = "bioinformatic eval"

for doi, ftitle, fabstract in [(doi, t, a) for (doi, t, a, e), (x, y) in zip(mappings, latentx)]:
    if query not in ftitle.lower(): continue

    if doi != "": print(f"https://doi.org/{doi}")
    print(ftitle)
    print(fabstract)
    print()

https://doi.org/10.1186/1471-2164-8-437
Bioinformatic evaluation of L-arginine catabolic pathways in 24 cyanobacteria and transcriptional analysis of genes encoding enzymes of L-arginine catabolism in the cyanobacterium Synechocystis sp. PCC 6803.
So far very limited knowledge exists on L-arginine catabolism in cyanobacteria, although six major L-arginine-degrading pathways have been described for prokaryotes. Thus, we have performed a bioinformatic analysis of possible L-arginine-degrading pathways in cyanobacteria. Further, we chose Synechocystis sp. PCC 6803 for a more detailed bioinformatic analysis and for validation of the bioinformatic predictions on L-arginine catabolism with a transcript analysis.
We have evaluated 24 cyanobacterial genomes of freshwater or marine strains for the presence of putative L-arginine-degrading enzymes. We identified an L-arginine decarboxylase pathway in all 24 strains. In addition, cyanobacteria have one or two further pathways representing either 

In [26]:
bounds = (
    -40, -20,
    -40, -22
)

for doi, ftitle, fabstract in [(doi, t, a) for (doi, t, a, e), (x, y) in zip(mappings, latentx) if bounds[0]<x>bounds[1] and bounds[2]<y<bounds[3]]:
    if doi != "": print(f"https://doi.org/{doi}")
    print(ftitle)
    print(fabstract)
    print()

https://doi.org/10.3390/toxins15030169
In Vitro
Cyanobacterial Harmful Bloom Lipopolysaccharides Induce Pro-Inflammatory Effects in Immune and Intestinal Epithelial Cells .
Freshwater cyanobacterial harmful blooms (CyanoHABs) produce a variety of toxic and bioactive compounds including lipopolysaccharides (LPSs). The gastrointestinal tract can be exposed to them via contaminated water even during recreational activities. However, there is no evidence of an effect of CyanoHAB LPSs on intestinal cells. We isolated LPSs of four CyanoHABs dominated by different cyanobacterial species and LPSs of four laboratory cultures representing the respective dominant cyanobacterial genera. Two intestinal and one macrophage cell lines were used to detect  pro-inflammatory activity of the LPS. All LPSs isolated from CyanoHABs and laboratory cultures induced cytokines production in at least one  model, except for LPSs from the  PCC7806 culture. LPSs isolated from cyanobacteria showed unique migration pa