In [None]:
!pip install dxpy

In [3]:
import dxpy as dx

# set dxpy config
DX_SECURITY_CONTEXT = {
    "auth_token_type": "Bearer", 
    "auth_token": '8bUBuBwpT9tBbIzm6FdVd2qr2wiMcuZ7'
    }

dx.set_security_context(DX_SECURITY_CONTEXT)

projects_dict = dict()

# get 002 and 003 projects
projects = dx.search.find_projects(
    name='00[2|3].*',
    name_mode='regexp',
    billed_to='org-emee_1',
    describe=True
    )

projects_dict.update({proj['id']: proj for proj in list(projects)})

In [4]:
import datetime as dt
from dateutil.relativedelta import relativedelta

# check if file last modified date > X month
def older_than(month, modified_epoch):

  modified = modified_epoch / 1000.0
  date = dt.datetime.fromtimestamp(modified)

  return date + relativedelta(months=+month) < dt.datetime.today()

# convert epoch datetime to human datetime
def make_human_date(modified_epoch):

  modified = modified_epoch / 1000.0
  human_readable = dt.datetime.fromtimestamp(modified).strftime('%Y-%m-%d %H:%M:%S.%f')
  
  return human_readable

In [5]:
# get all projects older than 4 month
modified_project_dict = {k:v for k, v in projects_dict.items() if older_than(4, v['describe']['modified'])}

In [6]:
import pandas as pd

final_dict = {k: v['describe'] for k,v in modified_project_dict.items()}

df = pd.DataFrame(final_dict.values())

# data cleanup
df.createdBy = df.createdBy.apply(lambda x: x['user'])
df.created = pd.to_datetime(df.created.apply(lambda x: make_human_date(x)))
df.modified = pd.to_datetime(df.modified.apply(lambda x: make_human_date(x)))
df.sort_values(by='modified', inplace=True)

In [7]:
total_cost = df.storageCost.sum()
total_data_usage = df.dataUsage.sum()

print(f'Total Cost: USD {total_cost}\nTotal Data Usage: {total_data_usage} GB')

Total Cost: USD 1581.381300336419
Total Data Usage: 87038.732529982 GB


In [11]:
# groupby user, get data and storage sum()
df.groupby('createdBy')[['dataUsage', 'storageCost']].sum().sort_values(by='dataUsage', ascending=False)

Unnamed: 0_level_0,dataUsage,storageCost
createdBy,Unnamed: 1_level_1,Unnamed: 2_level_1
user-jwahn,29477.228482,195.618587
user-garnerm,20929.872902,473.100972
user-ykim,15497.757922,384.038106
user-toutoua,8875.058259,227.877026
user-sophier,4222.097158,108.507897
user-aishadahir,3011.462061,73.259249
user-ccharalambous,3009.088365,77.289339
user-pyattc,1018.486588,26.174076
user-jethror1,997.556366,15.51285
user-emee_genetics_ark,0.124426,0.003198


In [12]:
df.head()

Unnamed: 0,id,name,class,created,modified,billTo,cloudAccount,level,dataUsage,sponsoredDataUsage,remoteDataUsage,region,summary,description,protected,restricted,downloadRestricted,databaseUIViewOnly,currency,containsPHI,createdBy,version,archivedDataUsage,storageCost,pendingTransfer,tags,defaultInstanceType,totalSponsoredEgressBytes,consumedSponsoredEgressBytes,provider,atSpendingLimit
228,project-FvQgF704QvxQb0zGPY23XGp6,002_200729_FH_Validation_bams,project,2020-07-29 13:11:56,2020-09-29 12:01:46.459,org-emee_1,cloudaccount-dnanexus,CONTRIBUTE,34.534023,0,0,aws:eu-central-1,,,False,False,False,,"{'dxCode': 0, 'code': 'USD', 'symbol': '$', 's...",False,user-toutoua,0,0.0,0.887524,,[],mem1_ssd1_x4,0,0,{},False
227,project-Fyg6bz04zz1kK1zyFQjYKXfx,002_201029_K00178_0294_BHK737BBXY_clinicalgene...,project,2020-11-13 11:16:12,2020-11-13 11:17:47.852,org-emee_1,cloudaccount-dnanexus,CONTRIBUTE,337.732016,0,0,aws:eu-central-1,,,False,False,False,,"{'dxCode': 0, 'code': 'USD', 'symbol': '$', 's...",False,user-ykim,0,0.0,8.679713,,[],mem1_ssd1_x4,0,0,{},False
226,project-FqpyXX84Jp7Z271V81gvg9jq,002_200619_K00178_0267_AHGW7TBBXY_clinicalgene...,project,2020-06-26 12:25:45,2020-12-10 10:39:35.707,org-emee_1,cloudaccount-dnanexus,CONTRIBUTE,830.526375,0,0,aws:eu-central-1,,,False,False,False,,"{'dxCode': 0, 'code': 'USD', 'symbol': '$', 's...",False,user-garnerm,2,0.0,21.344528,,[],mem1_ssd1_x4,0,0,{},False
225,project-Fz7kXF8418zJK0KbGq9qg3XK,002_171208_171211_NS500192,project,2020-12-08 15:18:05,2020-12-10 12:16:09.971,org-emee_1,cloudaccount-dnanexus,CONTRIBUTE,14.239045,0,0,aws:eu-central-1,,,False,False,False,,"{'dxCode': 0, 'code': 'USD', 'symbol': '$', 's...",False,user-ykim,0,0.0,0.365943,,[],mem1_ssd1_x4,0,0,{},False
224,project-FzkQ2604qjVV3zvP9yQy2vQ3,002_190925_K00178_0224_AHF2FNBBXY_clinicalgene...,project,2021-01-04 11:50:48,2021-01-04 11:55:29.842,org-emee_1,cloudaccount-dnanexus,CONTRIBUTE,0.0,0,0,aws:eu-central-1,,,False,False,False,,"{'dxCode': 0, 'code': 'USD', 'symbol': '$', 's...",False,user-ykim,2,0.0,0.0,,[],mem1_ssd1_x4,0,0,{},False
