In [1]:
from azure.storage.blob import ContainerClient
import numpy as np
import io
import cv2
import time
import matplotlib.pyplot as plt
%matplotlib inline

# Dataset website: http://theairlab.org/tartanair-dataset/
account_url = 'https://tartanair.blob.core.windows.net/'
container_name = 'tartanair-release1'

container_client = ContainerClient(account_url=account_url, 
                                 container_name=container_name,
                                 credential=None)


In [2]:
def get_environment_list():
    '''
    List all the environments shown in the root directory
    '''
    env_gen = container_client.walk_blobs()
    envlist = []
    for env in env_gen:
        envlist.append(env.name)
    return envlist

def get_trajectory_list(envname, easy_hard = 'Easy'):
    '''
    List all the trajectory folders, which is named as 'P0XX'
    '''
    assert(easy_hard=='Easy' or easy_hard=='Hard')
    traj_gen = container_client.walk_blobs(name_starts_with=envname + '/' + easy_hard+'/')
    trajlist = []
    for traj in traj_gen:
        trajname = traj.name
        trajname_split = trajname.split('/')
        trajname_split = [tt for tt in trajname_split if len(tt)>0]
        if trajname_split[-1][0] == 'P':
            trajlist.append(trajname)
    return trajlist

def _list_blobs_in_folder(folder_name):
    """
    List all blobs in a virtual folder in an Azure blob container
    """
    
    files = []
    generator = container_client.list_blobs(name_starts_with=folder_name)
    for blob in generator:
        files.append(blob.name)
    return files

def get_image_list(trajdir, left_right = 'left'):
    assert(left_right == 'left' or left_right == 'right')
    files = _list_blobs_in_folder(trajdir + '/image_' + left_right + '/')
    files = [fn for fn in files if fn.endswith('.png')]
    return files

def get_depth_list(trajdir, left_right = 'left'):
    assert(left_right == 'left' or left_right == 'right')
    files = _list_blobs_in_folder(trajdir + '/depth_' + left_right + '/')
    files = [fn for fn in files if fn.endswith('.npy')]
    return files

def get_flow_list(trajdir, ):
    files = _list_blobs_in_folder(trajdir + '/flow/')
    files = [fn for fn in files if fn.endswith('flow.npy')]
    return files

def get_flow_mask_list(trajdir, ):
    files = _list_blobs_in_folder(trajdir + '/flow/')
    files = [fn for fn in files if fn.endswith('mask.npy')]
    return files

def get_posefile(trajdir, left_right = 'left'):
    assert(left_right == 'left' or left_right == 'right')
    return trajdir + '/pose_' + left_right + '.txt'

def get_seg_list(trajdir, left_right = 'left'):
    assert(left_right == 'left' or left_right == 'right')
    files = _list_blobs_in_folder(trajdir + '/seg_' + left_right + '/')
    files = [fn for fn in files if fn.endswith('.npy')]
    return files

In [3]:
envlist = get_environment_list()
print('Find {} environments..'.format(len(envlist)))
print(envlist)

ServiceRequestError: <urllib3.connection.HTTPSConnection object at 0x11cf944a0>: Failed to resolve 'tartanair.blob.core.windows.net' ([Errno 8] nodename nor servname provided, or not known)

In [None]:
from os.path import isdir, isfile, join

class AirLabDownloader(object):
    def __init__(self, bucket_name = 'tartanair') -> None:
        from minio import Minio
        endpoint_url = "airlab-share-01.andrew.cmu.edu:9000"
        # public key (for donloading): 
        access_key = "4e54CkGDFg2RmPjaQYmW"
        secret_key = "mKdGwketlYUcXQwcPxuzinSxJazoyMpAip47zYdl"

        self.client = Minio(endpoint_url, access_key=access_key, secret_key=secret_key, secure=True)
        self.bucket_name = bucket_name

    def download(self, filelist, destination_path):
        target_filelist = []

        for source_file_name in filelist:
            target_file_name = join(destination_path, source_file_name.replace('/', '_'))
            target_filelist.append(target_file_name)
            print('--')
            if isfile(target_file_name):
                print('Error: Target file {} already exists..'.format(target_file_name))
                return False, None

            print(f"  Downloading {source_file_name} from {self.bucket_name}...")
            self.client.fget_object(self.bucket_name, source_file_name, target_file_name)
            print(f"  Successfully downloaded {source_file_name} to {target_file_name}!")

        return True, target_filelist

In [29]:
levellist = ['Easy', 'Hard']
typelist = ['image']
cameralist = ['left']

In [30]:
def get_size(filesizelist, filelist):
    keys_sizes = {rrr[0]: float(rrr[1]) for rrr in filesizelist}
    total_size = 0.
    for ff in filelist:
        total_size += keys_sizes[ff]
    return total_size

In [31]:
with open('files.txt') as f:
    lines = f.readlines()
zipsizelist = [ll.strip().split() for ll in lines if ll.strip().split()[0].endswith('.zip')]

downloadlist = []
for zipfile, _ in zipsizelist:
    zf = zipfile.split('/')
    filename = zf[-1]
    difflevel = zf[-2]

    # image/depth/seg/flow
    filetype = filename.split('_')[0] 
    # left/right/flow/mask
    cameratype = filename.split('.')[0].split('_')[-1]
    
    if (difflevel in levellist) and (filetype in typelist) and (cameratype in cameralist):
        downloadlist.append(zipfile) 

if len(downloadlist)==0:
    print('No file meets the condition!')
    exit()

print('{} files are going to be downloaded...'.format(len(downloadlist)))
for fileurl in downloadlist:
    print ('  -', fileurl)

all_size = get_size(zipsizelist, downloadlist)
print('*** Total Size: {} GB ***'.format(all_size))

# download_from_cloudflare_r2(s3, downloadlist, outdir, bucket_name)


2 files are going to be downloaded...
  - amusement/Easy/image_left.zip
  - amusement/Hard/image_left.zip
*** Total Size: 9.134554048 GB ***


In [32]:
res, downloadfilelist = downloader.download(downloadlist, "output")

In [33]:
downloadfilelist

['output/amusement_Easy_image_left.zip',
 'output/amusement_Hard_image_left.zip']