In [119]:
# get data from RESTful API in METADB
import requests
import json
import time
import numpy as np

from functools import wraps

URL = 'http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com'
USER = 'chunlab@chunlab.com'
TOKEN = None

def _token(user=USER):
    url = URL+"/TOKENS"
    body = "{\"userid\":\"%s\"}" %user
    response = requests.request("POST", url, data=body)
    token = json.loads(response.text)
    return token

def _headers():
    token = TOKEN
    headers = {
        "Content-type":"application/json",
        "x-cl-token":token['token']
    }
    return headers

def api_timer(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        t1 = time.time()
        result = func(*args, **kwargs)
        t2 = time.time() - t1
        print('%s takes %f secs.' %(func.__name__, t2))
        return result
    return wrapper

def api_wrapper(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        global TOKEN
        #if args
        try:
            if TOKEN is None:
                TOKEN = _token(user)
            res = func(*args, **kwargs)
            time.sleep(0.001)
            print('%s succeeded' %(func.__name__))
        except:
            try:
                TOKEN = _token(user)
                res = func(*args, **kwargs)
                time.sleep(0.001)
                print('%s succeeded' %(func.__name__))
            except:
                res = None
                time.sleep(0.001)
                raise RuntimeError('%s failed' %(func.__name__))

        return res

    return wrapper

def get_profiles(mtp_ids, user=USER):#, greengene=False
    profiles = [get_profile(mtp_id, user) for mtp_id in mtp_ids]        
    return profiles

@api_timer
@api_wrapper
def get_profile(mtp_id, user=USER):#, greengene=False
    url = URL+"/USERS/%s/SAMPLES/%s/PROFILE" %(user, mtp_id)
    #if greengene:
    #    url += '?UseQIIME'
    headers = _headers()
    response = requests.request("GET", url, headers=headers)
    res = json.loads(response.text)
    return res


def get_token(user=USER):
    url = URL+"/TOKENS"
    body = "{\"userid\":\"%s\"}" %user
    response = requests.request("POST", url, data=body)
    token = json.loads(response.text)
    return token

def _headers_with_token(token):
    headers = {
        "Content-type":"application/json",
        "x-cl-token":token['token'] if type(token)==dict else token
    }
    return headers

def get_samples(token, user=USER):
    url = URL+"/USERS/%s/SAMPLES" %(user)
    headers = _headers_with_token(token)
    
    try:
        response = requests.request("GET", url, headers=headers)
        res = json.loads(response.text)
        time.sleep(0.001)
        print('getting samples succeeded: %s' %url)

    except:
        res = None
        time.sleep(0.001)
        raise RuntimeError('getting samples failed: %s' %url)

    return res


#METADB API
def search_ezbiocloud_samples(token, where):
    url = URL+"/METADB/SAMPLES?%s" %where
    headers = _headers_with_token(token)

    try:
        response = requests.request("GET", url, headers=headers)
        res = json.loads(response.text)
        print('searching samples succeeded: %s' %url)

    except:
        res = None
        time.sleep(0.001)
        raise RuntimeError('searching samples failed: %s' %url)
        
    return res



In [178]:
import time
import pickle
import json
import os
def download_project(prj_num):
    check_sample=1
    loop_idx=0
    idx=1
    while check_sample==1:
        sample_name="CL"+str(prj_num)+"S"+str(idx)
        if loop_idx % 1000 == 0:
            TOKEN = get_token(user=USER)

        prof = get_profile(sample_name,user=USER)
        try:
            print(prof['code'])
            #print('project '+prj_num+' complete')
            check_sample=0
        except:
            meta = search_ezbiocloud_samples(TOKEN,'mtp_id='+sample_name)
            t_sample = Sample(prof,meta)
            
            # save as pickle
            with open("PROJ"+str(prj_num)+".pkl","a+b") as f1:
                pickle.dump(t_sample,f1,pickle.HIGHEST_PROTOCOL)
                # it works quite well
            
            # save as json
            '''
            with open("PROJ"+str(prj_num)+".json","a+") as f2:
                json.dump(t_sample.toJSON(),f2)
                # not that well...
            '''
            # save as h5?
            del t_sample
                
            idx=idx+1
            time.sleep(0.04)

        loop_idx = loop_idx+1


TOKEN = get_token(user=USER)
for i in range(1,474):
    if os.path.isfile("./PROJ"+str(i)+".pkl"):
        print("file_exist")
        os.remove("./PROJ"+str(i)+".pkl")
        download_project(i)
    else:
        download_project(i)
#prof = 


get_profile succeeded
get_profile takes 0.075380 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S1
deleting object
get_profile succeeded
get_profile takes 0.124617 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S2
deleting object
get_profile succeeded
get_profile takes 0.087887 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S3
deleting object
get_profile succeeded
get_profile takes 0.114884 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S4
deleting object
get_profile succeeded
get_profile takes 0.118587 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S5
deleting object
get_profile succeeded
get_profile takes 0.080

get_profile succeeded
get_profile takes 0.077765 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S45
deleting object
get_profile succeeded
get_profile takes 0.096171 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S46
deleting object
get_profile succeeded
get_profile takes 0.083291 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S47
deleting object
get_profile succeeded
get_profile takes 0.089864 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S48
deleting object
get_profile succeeded
get_profile takes 0.084818 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S49
deleting object
get_profile succeeded
get_profile takes 

get_profile succeeded
get_profile takes 0.110814 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S89
deleting object
get_profile succeeded
get_profile takes 0.076662 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S90
deleting object
get_profile succeeded
get_profile takes 0.086931 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S91
deleting object
get_profile succeeded
get_profile takes 0.102871 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S92
deleting object
get_profile succeeded
get_profile takes 0.071593 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL1S93
deleting object
get_profile succeeded
get_profile takes 

KeyboardInterrupt: 

In [172]:
# since meta.keys() are different project by project --> use all keys
#meta_key = ['mtp_id','sample_id','host_sex','host_age','host_life_stage','host_nation','tag_region','platform','category','ee']
import pickle

class Sample:
    # constructor
    def __init__(self,profile,meta):
        try:
            self.meta = meta[0] #since meta_api returns [{k-v,k-v ..}] form
            prof_key = ['tn','nm','p','lvl']
            self.profile = [tax['cnt'][0] for tax in profile]
            #self.key = [tax['tn'] for tax in profile]
            self.key =[{p_key : tax[p_key] for p_key in prof_key} for tax in profile]
        except:
            self.meta = None
            self.profile = None
            self.key = None
    
    
    def toJSON(self):
        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
    
    
    # destructor
    def __del__(self):
        print("deleting object")
    
    # normalization part
    # todo : make function-chaining work...
    # maybe this also can be module...
    
def ratio(self): #this is test function, it will merge to normal()
    try: # initial
        ratio=np.divide(self.profile,self.profile[0])
        return list(ratio)
    except: # kind of operation chaining...?\
        print(self)
        return list(np.divide(self/self[0]))
            
        
    
    
    # alpha-div part
    # --> perhaps, we can make this into module...
    
    

        
        
        




In [171]:
TOKEN = get_token(user=USER)
t_prof = get_profile('CL3S3',user=USER)
t_meta = search_ezbiocloud_samples(TOKEN,'mtp_id=CL3S3')
#print(t_meta)
#print(t_meta)
#print(t_prof)
sample1 = Sample(t_prof,t_meta)
print(type(sample1.toJSON()))
del sample1


get_profile succeeded
get_profile takes 0.091044 secs.
searching samples succeeded: http://elb-gateway-1990213061.ap-northeast-2.elb.amazonaws.com/METADB/SAMPLES?mtp_id=CL3S3


TypeError: dump() missing 1 required positional argument: 'fp'

In [168]:
with open('./PROJ4.pkl', 'rb') as p4:
    # when object saved in pickle format, 
    # use iterator!
    merge_profiles = pickle.load(p4)
    #print(merge_profiles.profile)
    merge_profiles = pickle.load(p4)
    #print(merge_profiles.profile)
    

for sam in open('./PROJ4.json','rb'):
    print(json.load(sam))

deleting object
deleting object


AttributeError: 'bytes' object has no attribute 'read'