# Querying database with JSONB field

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import importlib
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
    
import imaging_db.filestorage.s3_storage as s3_storage

/Users/jenny.folkesson/Code/imagingDB


In [2]:
import imaging_db.database.db_session as db_session


importlib.reload(db_session)
credentials_filename = "/Users/jenny.folkesson/Code/db_credentials.json"

In [3]:
# Find all datasets containing protein TOPOR
with db_session.session_scope(credentials_filename) as session:
    datasets = session.query(db_session.DataSet) \
    .join(db_session.FramesGlobal) \
    .join(db_session.Frames) \
    .filter(db_session.FramesGlobal.metadata_json.contains({"protein_name": "TOPOR"})) \
    .all()
    
    print(len(datasets))
    for d in datasets:
        print(d.dataset_serial)

6
ML-2018-11-08-15-06-10-0001
ML-2018-11-08-15-06-18-0001
ML-2018-11-08-15-36-16-0001
ML-2018-11-08-15-36-44-0001
ML-2018-11-08-15-37-09-0001
ML-2018-11-08-15-37-34-0001


In [21]:
# Find all unique protein names in database and order them alphabetically
with db_session.session_scope(credentials_filename) as session:
    proteins = session.query(
        db_session.FramesGlobal.metadata_json['protein_name']) \
            .distinct() \
            .order_by(db_session.FramesGlobal.metadata_json['protein_name'])

    for p in proteins:
        print(p)

('ACTB',)
('AGTRAP',)
('AHSA1',)
('ARL6IP1',)
('ATG9A',)
('BAG1',)
('BAG2',)
('BAG3',)
('BAG4',)
('BAG5',)
('BAG6',)
('BCAP31',)
('BPNT1',)
('BTF3',)
('CANX',)
('CDC37',)
('CHM',)
('CHMP3',)
('CHMP6',)
('CLCN3',)
('CLTA',)
('CTRL1',)
('CTRL2',)
('DnaJA1',)
('DnaJA2',)
('DnaJA3',)
('DnaJB1',)
('DnaJB12',)
('DnaJB14',)
('DnaJB2',)
('DnaJB4',)
('DnaJB5',)
('DnaJB6',)
('DNAJC1',)
('DNAJC11',)
('DNAJC12',)
('DNAJC13',)
('DNAJC14',)
('DNAJC17',)
('DnaJC18',)
('DnaJC2',)
('DnaJC21',)
('DNAJC24',)
('DNAJC25',)
('DNAJC30',)
('DNAJC4',)
('DNAJC5',)
('DNAJC6',)
('DNAJC7',)
('DNAJC8',)
('DnaJC9',)
('ERVK3',)
('FBXO7',)
('FBXO9',)
('G3BP1',)
('G3BP2',)
('HAUS6',)
('HERPUD1',)
('HERPUD2',)
('HGS',)
('HMGA2',)
('HSF1',)
('HSF2',)
('HSP90AA1',)
('HSP90AB1',)
('HSP90B1',)
('HSPA12A',)
('HSPA14',)
('HSPA1B',)
('HSPA4',)
('HSPA4L',)
('HSPA8',)
('HSPB1',)
('HSPB11',)
('HSPBP1',)
('HSPH1',)
('IMPA2',)
('IMPAD1',)
('INPP4A',)
('INPP5A',)
('INPP5E',)
('INPP5K',)
('INPPL1',)
('LINC0',)
('MIEF1',)
('MKKS',)
('

In [12]:
# Get channel 0 for the first dataset
dataset_id = 'ML-2018-11-08-15-06-10-0001'

db_ops = db_session.DatabaseOperations(
    credentials_filename,
    dataset_serial="ISP-2018-06-01-00-00-00-0002",
)
global_meta, frames_meta = db_ops.get_frames_meta(channels=(0,))

for i, f in frames_meta.iterrows():
    print(i, f.file_name)

0 im_c000_z000_t000_p002.png
1 im_c000_z001_t000_p002.png
2 im_c000_z002_t000_p002.png
3 im_c000_z003_t000_p002.png
4 im_c000_z004_t000_p002.png
5 im_c000_z005_t000_p002.png
6 im_c000_z006_t000_p002.png
7 im_c000_z007_t000_p002.png
8 im_c000_z008_t000_p002.png
9 im_c000_z009_t000_p002.png
10 im_c000_z010_t000_p002.png
11 im_c000_z011_t000_p002.png
12 im_c000_z012_t000_p002.png


In [15]:
import time
# Get image stack from metadata
importlib.reload(s3_storage)
data_loader = s3_storage.DataStorage(s3_dir=global_meta["s3_dir"])
t0 = time.time()
im_stack, dim_order = data_loader.get_stack_from_meta(global_meta, frames_meta)
print("Time to load stack: {:.2f}".format(time.time() - t0))

print("Stack shape:", im_stack.shape)
print("Dimensions and order:", dim_order)

Time to load stack: 3.82
Stack shape: (2048, 2048, 13)
Dimensions and order: XYZ


In [17]:
# Plot frames as gif
import matplotlib.animation as animation
from IPython.display import HTML

fig = plt.figure()
plt.rcParams['figure.figsize'] = [12, 12]
plt.axis('off')

frames = []
for idx in range(im_stack.shape[-1]):
    im = plt.imshow(im_stack[..., idx], animated=True)
    frames.append([im])

anim_kmean = animation.ArtistAnimation(fig, frames)
plt.close(anim_kmean._fig)

# Call function to display the animation
HTML(anim_kmean.to_html5_video())