# Querying database with JSONB field

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import importlib
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
    
import imaging_db.filestorage.s3_storage as s3_storage

/Users/jenny.folkesson/Code/imagingDB


In [3]:
import imaging_db.database.db_operations as db_ops
import imaging_db.utils.db_utils as db_utils

importlib.reload(db_ops)
credentials_filename = "/Users/jenny.folkesson/Code/db_credentials.json"
credentials_str = db_utils.get_connection_str(credentials_filename)

In [4]:
# Find all datasets containing protein TOPOR
with db_ops.session_scope(credentials_str) as session:
    datasets = session.query(db_ops.DataSet) \
    .join(db_ops.FramesGlobal) \
    .join(db_ops.Frames) \
    .filter(db_ops.FramesGlobal.metadata_json.contains({"protein_name": "TOPOR"})) \
    .all()
    
    print(len(datasets))
    for d in datasets:
        print(d.dataset_serial)

6
ML-2018-11-08-15-06-10-0001
ML-2018-11-08-15-06-18-0001
ML-2018-11-08-15-36-16-0001
ML-2018-11-08-15-36-44-0001
ML-2018-11-08-15-37-09-0001
ML-2018-11-08-15-37-34-0001


In [5]:
# Find all unique protein names in database and order them alphabetically
with db_ops.session_scope(credentials_str) as session:
    proteins = session.query(
        db_ops.FramesGlobal.metadata_json['protein_name']) \
            .distinct() \
            .order_by(db_ops.FramesGlobal.metadata_json['protein_name'])

    for p in proteins:
        print(p)

('ABCE1',)
('ABR',)
('ACAP2',)
('ACAP3',)
('ACLY',)
('ACTB',)
('ACTR2',)
('ACTR3',)
('AGAP1',)
('AGAP3',)
('AGPAT2',)
('AGPAT3',)
('AGPAT4',)
('AGPAT5',)
('AGPAT6',)
('AGPAT9',)
('AGTRAP',)
('AHSA1',)
('AKAP12',)
('AKAP13',)
('AKR1A1',)
('AKT1',)
('ALCAM',)
('ALDH16A1',)
('ALDH18A1',)
('ALDH7A1',)
('ALDH9A1',)
('ALG2',)
('ALS2',)
('ALX1',)
('ANAPC1',)
('ANAPC10',)
('ANAPC11',)
('ANAPC13',)
('ANAPC15',)
('ANAPC16',)
('ANAPC2',)
('ANAPC4',)
('ANAPC5',)
('ANAPC7',)
('AP2M1',)
('APC',)
('APPL1',)
('APPL2',)
('ARAP1',)
('ARCN1',)
('ARF1',)
('ARF3',)
('ARF4',)
('ARF5',)
('ARF6',)
('ARFGAP1',)
('ARFGAP2',)
('ARFGAP3',)
('ARFGEF1',)
('ARFGEF2',)
('ARFIP1',)
('ARFIP2',)
('ARHGAP1',)
('ARHGAP11A-B',)
('ARHGAP17',)
('ARHGAP19',)
('ARHGAP21',)
('ARHGAP22',)
('ARHGAP35',)
('ARHGAP5',)
('ARHGAP8',)
('ARHGEF1',)
('ARHGEF12',)
('ARHGEF18',)
('ARHGEF25',)
('ARHGEF26',)
('ARHGEF39',)
('ARHGEF7',)
('ARL1',)
('ARL10',)
('ARL13B',)
('ARL14EP',)
('ARL15',)
('ARL16',)
('ARL17A',)
('ARL17B',)
('ARL2',)
('ARL2

In [8]:
dataset_id = 'ML-2018-11-08-15-06-10-0001'

db_inst = db_ops.DatabaseOperations(
    dataset_serial=dataset_id,
)
with db_ops.session_scope(credentials_str) as session:
    global_meta, frames_meta = db_inst.get_frames_meta(
        session=session,
        channels=(0,),
    )
    for i, f in frames_meta.iterrows():
        print(i, f.file_name)

0 im_c000_z000_t000_p000.png
1 im_c000_z001_t000_p000.png
2 im_c000_z002_t000_p000.png
3 im_c000_z003_t000_p000.png
4 im_c000_z004_t000_p000.png
5 im_c000_z005_t000_p000.png
6 im_c000_z006_t000_p000.png
7 im_c000_z007_t000_p000.png
8 im_c000_z008_t000_p000.png
9 im_c000_z009_t000_p000.png
10 im_c000_z010_t000_p000.png
11 im_c000_z011_t000_p000.png
12 im_c000_z012_t000_p000.png
13 im_c000_z013_t000_p000.png
14 im_c000_z014_t000_p000.png
15 im_c000_z015_t000_p000.png
16 im_c000_z016_t000_p000.png
17 im_c000_z017_t000_p000.png
18 im_c000_z018_t000_p000.png
19 im_c000_z019_t000_p000.png
20 im_c000_z020_t000_p000.png
21 im_c000_z021_t000_p000.png
22 im_c000_z022_t000_p000.png
23 im_c000_z023_t000_p000.png
24 im_c000_z024_t000_p000.png
25 im_c000_z025_t000_p000.png
26 im_c000_z026_t000_p000.png
27 im_c000_z027_t000_p000.png
28 im_c000_z028_t000_p000.png
29 im_c000_z029_t000_p000.png
30 im_c000_z030_t000_p000.png
31 im_c000_z031_t000_p000.png
32 im_c000_z032_t000_p000.png
33 im_c000_z033_t000

In [11]:
import time
# Get image stack from metadata
importlib.reload(s3_storage)
print(global_meta["s3_dir"])
data_loader = s3_storage.DataStorage(s3_dir=global_meta["s3_dir"])
t0 = time.time()
im_stack, dim_order = data_loader.get_stack_from_meta(global_meta, frames_meta)
print("Time to load stack: {:.2f}".format(time.time() - t0))

print("Stack shape:", im_stack.shape)
print("Dimensions and order:", dim_order)

raw_frames/ML-2018-11-08-15-06-10-0001
Time to load stack: 2.25
Stack shape: (1024, 1024, 35)
Dimensions and order: XYZ


In [12]:
# Plot frames as gif
import matplotlib.animation as animation
from IPython.display import HTML

fig = plt.figure()
plt.rcParams['figure.figsize'] = [12, 12]
plt.axis('off')

frames = []
for idx in range(im_stack.shape[-1]):
    im = plt.imshow(im_stack[..., idx], animated=True)
    frames.append([im])

anim_kmean = animation.ArtistAnimation(fig, frames)
plt.close(anim_kmean._fig)

# Call function to display the animation
HTML(anim_kmean.to_html5_video())