In [1]:
# this works on my Ubuntu laptop. It will work on lxplus as well
# I pip-installed DBSclient
# as per https://github.com/dmwm/DBSClient
# I create a proxy and point $X509_USER_PROXY to it
# and I do:
# export DBS3_CLIENT_ROOT=/home/belforte/.local/lib/python3.8/site-packages/dbs
# export RUCIO_CONFIG=/cvmfs/cms.cern.ch/rucio/x86_64/slc7/py3/current/etc/rucio.cfg
# (it is good to have /cvmfs on my laptop)

In [2]:
# sort of standard listof imports, not all used here
import sys
import os
import logging
import dbs.apis.dbsClient as dbsClient
import random
import pprint
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


In [3]:
# just in case something we use needs a logger
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger('Logger')

In [4]:
# initialize a DBS API client
dbsUrl = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
dbs = dbsClient.DbsApi(url=dbsUrl)

In [5]:
# look for datasets parially on disk. AOD looks a goot candidate
allAODs = [d['dataset'] for d in dbs.listDatasets(dataset='/*/*/AOD')]
print(len(allAODs))

7867


In [6]:
# pick not-too-old datasets and prefer the UltraLegacy campaign
AODs = [dataset for dataset in allAODs if 'UL' in dataset and
        not any (tag in dataset for tag in
             ['2013', '2014', '2015','2017','REPLAY','Commissioning']
                )
        ]
print(len(AODs))

500


In [7]:
# intialize a rucio client
from rucio.client import Client
rucio=Client()
# test it
rucio.whoami()

{'account_type': 'USER',
 'suspended_at': None,
 'created_at': '2020-04-28T23:25:20',
 'account': 'belforte',
 'status': 'ACTIVE',
 'email': 'Stefano.Belforte@cern.ch',
 'deleted_at': None,
 'updated_at': '2020-04-28T23:25:20'}

In [8]:
# this method lists #blocks/#block-on-disk
def locateBlocks(dataset):
    onDisk=False
    locationsMap={}
    blocksDicts=dbs.listBlocks(dataset=dataset)
    blocks = [d['block_name'] for d in blocksDicts]
    
    for block in blocks:
        replicas = set()
        response = rucio.list_dataset_replicas(scope='cms', name=block, deep=True)
        for item in response:
            # same as complete='y' used for PhEDEx
            if item['state'].upper() == 'AVAILABLE' and not 'Tape' in item['rse']:
                replicas.add(item['rse'])
                onDisk=True
        if replicas:  # only fill map for blocks which have at least one location
            locationsMap[block] = replicas
    if locationsMap and onDisk:
        print ('dset #blocks/#onDisk: %s %d/%d' % (dataset, len(blocks), len(locationsMap)) )
        #print(locationsMap)

In [9]:
# sample same AOD's
for dataset in AODs[5:7]:
    locateBlocks(dataset)
for dataset in AODs[110:115]:
    locateBlocks(dataset)

dset #blocks/#onDisk: /BTagCSV/Run2016E-21Feb2020_UL2016_HIPM_rsb-v1/AOD 33/1
dset #blocks/#onDisk: /BTagCSV/Run2016F-21Feb2020_UL2016-v1/AOD 5/5
dset #blocks/#onDisk: /DoubleMuonLowMass/Run2016D-21Feb2020_UL2016_HIPM-v1/AOD 16/6
dset #blocks/#onDisk: /DoubleMuonLowMass/Run2016E-21Feb2020_UL2016_HIPM-v1/AOD 18/2
dset #blocks/#onDisk: /DoubleMuonLowMass/Run2016F-21Feb2020_UL2016_HIPM-v1/AOD 18/7
dset #blocks/#onDisk: /DoubleMuonLowMass/Run2016G-21Feb2020_UL2016-v1/AOD 55/33
