# Notebook to test download of dicom images
*author: Wiebke Toussaint*

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import boto3
import tempfile
from time import time

projectdir = os.path.dirname(os.getcwd())
sys.path.append(os.path.join(projectdir,'src'))

from d01_data.ingestion_xtdb import ingest_xtdb
from d02_intermediate.clean_xtdb import *
from d00_utils.db_utils import *
from d00_utils.s3_utils import *
from d00_utils.dcm_utils import decompress_dcm

db_table = 'metadata'
credentials_file = os.path.join(os.path.expanduser('~'), '.psql_credentials.json')

In [None]:
io_views = dbReadWriteViews()
io_views.list_tables()

In [None]:
def extract_imgs_from_dicom(bucket, prefix='', suffix='.dcm', outdir=None):
    """
    Get all the keys with a specific suffix from a s3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with this prefix (optional).
    :param suffix: Only fetch objects whose keys end with this suffix, default='dcm'
    :return: list of keys
    """
    s3 = boto3.client('s3')

    keys = []
    kwargs = {'Bucket': bucket}

    os.makedirs(outdir, exist_ok=True)

    if isinstance(prefix, str):
        kwargs['Prefix'] = prefix

    while True:
        resp = s3.list_objects_v2(**kwargs)
        try:
            for obj in resp['Contents']:
                if obj['Key'].startswith(prefix) and obj['Key'].endswith(suffix):
                    keys.append(obj['Key'])
                    s3.download_file(bucket, obj['Key'], os.path.join(outdir, 'a_' + ''.join(prefix.split('/a')) + '.dcm'))
        except:
            print(prefix, 'DOWNLOAD ERROR')
        
        try:
            kwargs['ContinuationToken'] = resp['NextContinuationToken']
        
        except KeyError:
            break

    return

### Download Sample Dicom Images

In [None]:
instances = io_views.get_table('instances_w_labels_test_downsampleby5')
prefix = instances['studyidk'].astype(str) + '/a_' + instances['filename'].astype(str)
filenames = 'a_' + instances['studyidk'].astype(str) + '_' + instances['filename'].astype(str) + '.dcm'
download_dict = dict(zip(prefix, filenames))
datadir = os.path.expanduser('/home/ubuntu/data/01_raw/test_downsampleby5')
raw_datadir = os.path.join(datadir, 'raw')

In [None]:
for p in list(download_dict.keys()):
    tmpfile = extract_imgs_from_dicom('cibercv', prefix=p, suffix='.dcm', outdir=datadir)
    dcm_filepath=os.path.join(datadir, download_dict[p])
    dcm_rawfilepath=os.path.join(raw_datadir,download_dict[p]+'_raw')
    decompress_dcm(dcm_filepath, dcm_rawfilepath)

In [None]:
s3 = boto3.client('s3')

keys = []
kwargs = {'Bucket': 'cibercv'}

tmp = tempfile.NamedTemporaryFile()

# os.makedirs(outdir, exist_ok=True)

if isinstance(prefix, str):
    kwargs['Prefix'] = p
while True:
    resp = s3.list_objects_v2(**kwargs)
    try:
        for obj in resp['Contents']:
            if obj['Key'].startswith(p) and obj['Key'].endswith(suffix):
                print(obj['Key'])
                keys.append(obj['Key'])
                s3.download_file(bucket, obj['Key'], tmp.name) #os.path.join(outdir, 'a_' + ''.join(prefix.split('/a')) + '.dcm'))
    except:
        print(p, 'DOWNLOAD ERROR')

    try:
        kwargs['ContinuationToken'] = resp['NextContinuationToken']

    except KeyError:
        break

tmp.name