In [1]:
#  Dependencies
import requests
from elasticsearch import Elasticsearch,helpers
import elasticsearch_dsl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import uuid
import random
import json

In [2]:
# host_name = 'https://es-atlas.cern.ch/kibana'
# es-py __version__ is 2.3.0
# es_dsl __version__ is 5.3.0
# initialise elastic search with authorisation

es = Elasticsearch(['es-atlas.cern.ch:9203'],
                                 timeout=10000,
                                 use_ssl=True,
                                 verify_certs=True,
                                 ca_certs='/etc/pki/tls/certs/CERN-bundle.pem',
                                 http_auth='roatlas:la2Dbd5rtn3df!sx')

In [77]:
def get_indices(es):
    indices = es.indices.get_aliases().keys()
    # len(indices)
    rucio= (index for index in indices if('atlas_rucio-events' in index))
    events = []
    for event in rucio :
        events.append(event)
    print('total event indices:',len(events),'\n')
    #print(events)
    indices_dict = {}
    for event in events:
        i = es.count(index=event)
        indices_dict[event] = i['count']
    # print('total data points:',sum(int(list(indices_dict.values()))))
    print(indices_dict)
    return indices, indices_dict
    

# saves data to a dataframe
def extract_data(index, query, scan_size, scan_step):
    resp = es.search(
    index = index,
    scroll = '20m',
    body = query,
    size = scan_step)

    sid = resp['_scroll_id']
    scroll_size = resp['hits']['total']
    results=[]
    for hit in resp['hits']['hits']:
        results.append(hit['_source']['payload'])
    steps = int((scan_size-scan_step)/ scan_step)

    # Start scrolling

    for i in range(steps):
        if i%10==0:
            print("Scrolling index : {} ; step : {} ...\n ".format(index,i))
        resp = es.scroll(scroll_id = sid, scroll = '20m')
        # Update the scroll ID
        sid = resp['_scroll_id']
        # Get the number of results that we returned in the last scroll
        scroll_size = len(resp['hits']['hits'])
        if i%10==0:
            print("scroll size: " + str(scroll_size))
        for hit in resp['hits']['hits']:
            results.append(hit['_source']['payload'])
    
    print("\n Done Scrolling through {} !! \n".format(index))
    results = pd.DataFrame(results)
    print(results.info(), '\n')
    return results

def get_indices_data(indices_list, query, scan_size, scan_step):
    data = pd.DataFrame()
    for index in indices_list:
        tmp_data = extract_data(index, query, scan_size, scan_step)
        data = data.append(tmp_data, ignore_index=True)
#         plot_index_data(tmp_data)
    print(data.info())
#     plot_index_data(dataframe)
    print("\n------------------- ALL DONE !!!!! -----------------------")
    return data

# def plot_data(df):
    

In [78]:
print(es.ping())
es.info()

True


{'cluster_name': 'atlas',
 'cluster_uuid': 'jGbidQR5T7Ok-8EoeXoFRA',
 'name': 'esatlass01-ites_atlas',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2017-02-09T22:05:32.386Z',
  'build_hash': 'db0d481',
  'build_snapshot': False,
  'lucene_version': '6.4.1',
  'number': '5.2.1'}}

In [79]:
es.cluster.health()

{'active_primary_shards': 2116,
 'active_shards': 4398,
 'active_shards_percent_as_number': 100.0,
 'cluster_name': 'atlas',
 'delayed_unassigned_shards': 0,
 'initializing_shards': 0,
 'number_of_data_nodes': 9,
 'number_of_in_flight_fetch': 0,
 'number_of_nodes': 14,
 'number_of_pending_tasks': 0,
 'relocating_shards': 0,
 'status': 'green',
 'task_max_waiting_in_queue_millis': 0,
 'timed_out': False,
 'unassigned_shards': 0}

In [80]:
indices, indices_to_count = get_indices(es)

total event indices: 32 

{'atlas_rucio-events-2017.06.01': 13624672, 'atlas_rucio-events-2017.06.03': 12449162, 'atlas_rucio-events-2017.05.18': 16944116, 'atlas_rucio-events-2017.06.14': 8821194, 'atlas_rucio-events-2017.05.17': 13075677, 'atlas_rucio-events-2017.05.25': 13829032, 'atlas_rucio-events-2017.06.13': 10110855, 'atlas_rucio-events-2017.06.02': 16490541, 'atlas_rucio-events-2017.06.09': 11647283, 'atlas_rucio-events-2017.05.29': 7871022, 'atlas_rucio-events-2017.05.23': 17165804, 'atlas_rucio-events-2017.05.31': 9625282, 'atlas_rucio-events-2017.06.05': 12903916, 'atlas_rucio-events-2017.06.15': 6788193, 'atlas_rucio-events-2017.06.04': 9845288, 'atlas_rucio-events-2017.06.06': 11178867, 'atlas_rucio-events-2017.06.08': 11591964, 'atlas_rucio-events-2017.05.24': 14482233, 'atlas_rucio-events-2017.05.20': 10152479, 'atlas_rucio-events-2017.06.07': 8533532, 'atlas_rucio-events-2017.05.22': 12321088, 'atlas_rucio-events-2017.05.26': 13379263, 'atlas_rucio-events-2017.05.28': 

# Total Data Points in the atlas_rucio-events-* index

In [81]:
import time
count=es.count(index='atlas_rucio-events-*')
print('total documents at- {} : {}'.format(time.strftime("%c"), count['count']) )

total documents at- Thu Jun 15 18:53:39 2017 : 340100646


In [82]:
indices_to_count['atlas_rucio-events-2017.05.25']

13829032

In [9]:
stats = es.field_stats(index='atlas_rucio-events-2016-06-30', fields=['payload.created_at'])
#atlas_rucio-events-2016-06-30
print(stats)

{'_shards': {'successful': 5, 'total': 5, 'failed': 0}, 'indices': {'_all': {'fields': {'payload.created_at': {'aggregatable': True, 'sum_doc_freq': 139636, 'doc_count': 34909, 'min_value': 1467292656000, 'searchable': True, 'density': 95, 'type': 'date', 'sum_total_term_freq': -1, 'max_value': 1467294822000, 'max_value_as_string': '2016-06-30 13:53:42', 'min_value_as_string': '2016-06-30 13:17:36', 'max_doc': 36737}}}}}


# Extracting Data from 2-3 Months

In [None]:
# myquery={
#       'query': {
#         'bool': {
#           'must': {
#             'match': {'description': 'fix'}
#           },
#           'must_not': {
#             'term': {'files': 'test_elasticsearch'}
#           }
#         }
#       }
#     }

# # es.get_source(index='atlas_rucio-events-*', doc_type=, id= )     , filter_path='hits.hits._source'
# es.search(index='atlas_rucio-events-*', size=5)

In [69]:
dFF=extract_data(index='atlas_rucio-events-2017.06.08',query=myquery,scan_size=300000, scan_step=10000)

Scrolling index : atlas_rucio-events-2017.06.08 ; step : 0 ...
 
scroll size: 10000
Scrolling index : atlas_rucio-events-2017.06.08 ; step : 10 ...
 
scroll size: 10000
Scrolling index : atlas_rucio-events-2017.06.08 ; step : 20 ...
 
scroll size: 10000

 Done Scrolling through atlas_rucio-events-2017.06.08 !! 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 26 columns):
account                0 non-null object
activity               300000 non-null object
bytes                  300000 non-null int64
checksum-adler         300000 non-null object
checksum-md5           0 non-null object
dst-rse                300000 non-null object
dst-type               300000 non-null object
dst-url                300000 non-null object
duration               300000 non-null int64
guid                   0 non-null object
name                   300000 non-null object
previous-request-id    32780 non-null object
protocol               300000 non-null ob

In [71]:
a=dFF.groupby(df.columns.tolist(),as_index=False).size()
a.unique()

array([], dtype=int64)

In [48]:
index= 'atlas_rucio-events-2017.05.17'
myquery = {
    "query": {
        "term": {
            '_type': 'transfer-done'
            }
        }
    }

# # scan_size=indices_dict['atlas_rucio-events-2017.05.17'],
# #                     scan_step=10000

# data = extract_data(index=index,
#                     query=myquery,
#                     scan_size=100000,
#                     scan_step=1000)

In [50]:
k = [index for index in events if ('atlas_rucio-events-2017.05' in index)]
l = [index for index in events if ('atlas_rucio-events-2017.06' in index)]
print(k, '\n', len(l))

['atlas_rucio-events-2017.05.27', 'atlas_rucio-events-2017.05.18', 'atlas_rucio-events-2017.05.31', 'atlas_rucio-events-2017.05.26', 'atlas_rucio-events-2017.05.22', 'atlas_rucio-events-2017.05.28', 'atlas_rucio-events-2017.05.29', 'atlas_rucio-events-2017.05.21', 'atlas_rucio-events-2017.05.19', 'atlas_rucio-events-2017.05.24', 'atlas_rucio-events-2017.05.17', 'atlas_rucio-events-2017.05.23', 'atlas_rucio-events-2017.05.30', 'atlas_rucio-events-2017.05.20', 'atlas_rucio-events-2017.05.25'] 
 15


In [53]:
myquery = {
    "query": {
        "term": {
            '_type': 'transfer-done'
            }
        }
    }
df = get_indices_data(indices_list=l, query=myquery, scan_size=200000, scan_step= 10000)

Scrolling index : atlas_rucio-events-2017.06.07 ; step : 0 ...
 
scroll size: 10000
Scrolling index : atlas_rucio-events-2017.06.07 ; step : 10 ...
 
scroll size: 10000

 Done Scrolling through atlas_rucio-events-2017.06.07 !! 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 26 columns):
account                0 non-null object
activity               200000 non-null object
bytes                  200000 non-null int64
checksum-adler         200000 non-null object
checksum-md5           0 non-null object
dst-rse                200000 non-null object
dst-type               200000 non-null object
dst-url                200000 non-null object
duration               200000 non-null int64
guid                   0 non-null object
name                   200000 non-null object
previous-request-id    2464 non-null object
protocol               200000 non-null object
reason                 200000 non-null object
request-id             200000 non-n

In [54]:
rucio_data = df

In [52]:
z=(df['previous-request-id'][0]) 

In [60]:
# df.to_csv('may.csv')
# id = (df['previous-request-id']!=z)
# id
a=df['previous-request-id'].notnull()

In [62]:
b=df['previous-request-id'][a]
b

47        c48c643a28f3467390801546a39bc836
90        e26c9d992afc439587821ac6d98aad17
296       73c33434d3ce43018eb75906d86cc43a
312       0304ec1b2e9245edaf7e557b80fe2370
351       b235a6c772204d22b4980d639282b54d
356       8d83a76f954a4568813ea0698f3ca43e
734       d2305fb1893547759c9cb4afa844844e
1130      49ec8faf7eb940a69ea0fe314946d61e
1138      4e50b47b67b64debaf8c8d4d62cf1560
1588      db705903b9c74dfc80c41a75b6d75ae7
1652      ee465b085d36427bbf0ac1fbb0a2ac5a
1769      414aa872c8d54672854ac595e54bbabe
2324      76a7be1948514ad3957eae7f50a5ea38
2339      52baaaabfa604f0abd7717b5bb451f07
2765      14950165f19e445b87c59dc2bf72462f
2771      01a12ff105b549c6ae3aeb877651aebe
2867      43142a581ef247f1952746b9c1455ec3
2996      6a5d1232aba04d93b930579c2f1409b7
3182      113d51814b8a4514ac52879c1c7af4f1
3183      8693f725b9634e88901c5bc1a2c2b8a3
3186      36dc5a202ab845d2ba5d032d8d4bcdc0
3190      d6453b8460d54385b2bcc19e9ada9ae0
3200      8c92b7edf59e415bafb41673714c87a0
3215      8

In [13]:
# from elasticsearch_dsl import Search
# from elasticsearch_dsl.query import MultiMatch, Match

# s = Search(using=es, index='atlas_rucio-events-2017.05.17', size=indices_dict['atlas_rucio-events-2017.05.17'])

# response = s.execute()
# print('Total %d hits found.' % response.hits.total)

# # indices_dict['atlas_rucio-events-2017.05.17']
# # print((response))
# # response.to_dict()
# print(response.success())
# print(response.took)
# print(response.hits.total)
# print(len(response.hits.hits))
# response.hits.hits[0]

# Preprocessing data

In [14]:
# rucio_data = pd.read_csv('may.csv')

In [55]:
rucio_data.head()


Unnamed: 0,account,activity,bytes,checksum-adler,checksum-md5,dst-rse,dst-type,dst-url,duration,guid,...,src-rse,src-type,src-url,started_at,submitted_at,tool-id,transfer-endpoint,transfer-id,transfer-link,transferred_at
0,,Functional Test,1048576,1243d9d6,,GRIF-LPNHE_DATADISK,DISK,srm://lpnse1.in2p3.fr:8446/srm/managerv2?SFN=/...,12,,...,RAL-LCG2_DATADISK,DISK,srm://srm-atlas.gridpp.rl.ac.uk:8443/srm/manag...,2017-06-07 01:57:56,2017-06-07 01:57:55,rucio-conveyor,https://fts3-pilot.cern.ch:8446,4a26928e-408e-57c0-881a-0f4b981ee7ca,https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/...,2017-06-07 01:58:08
1,,Functional Test,1048576,d98f0cec,,CSCS-LCG2_DATADISK,DISK,srm://storage01.lcg.cscs.ch:8443/srm/managerv2...,11,,...,RAL-LCG2_DATADISK,DISK,srm://srm-atlas.gridpp.rl.ac.uk:8443/srm/manag...,2017-06-07 01:58:01,2017-06-07 01:57:55,rucio-conveyor,https://fts3.cern.ch:8446,b554dc28-cee8-5feb-b155-5895c91fa694,https://fts3.cern.ch:8449/fts3/ftsmon/#/job/b5...,2017-06-07 01:58:12
2,,Functional Test,1048576,41083680,,PRAGUELCG2_DATADISK,DISK,srm://golias100.farm.particle.cz:8446/srm/mana...,10,,...,BNL-OSG2_DATADISK,DISK,srm://dcsrm.usatlas.bnl.gov:8443/srm/managerv2...,2017-06-07 01:57:56,2017-06-07 01:57:54,rucio-conveyor,https://fts3-pilot.cern.ch:8446,092402df-2a59-53ce-ad42-21ff3293d4a5,https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/...,2017-06-07 01:58:06
3,,Functional Test,1048576,1243d9d6,,UKI-LT2-QMUL_DATADISK,DISK,srm://se03.esc.qmul.ac.uk:8444/srm/managerv2?S...,7,,...,NIKHEF-ELPROD_DATADISK,DISK,srm://tbn18.nikhef.nl:8446/srm/managerv2?SFN=/...,2017-06-07 01:58:03,2017-06-07 01:57:55,rucio-conveyor,https://fts3-pilot.cern.ch:8446,54628f49-72f9-5a50-bbd3-d60d01ff97ff,https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/...,2017-06-07 01:58:10
4,,Production Output,4603527,2891e488,,PRAGUELCG2_DATADISK,DISK,srm://golias100.farm.particle.cz:8446/srm/mana...,10,,...,INFN-ROMA1_DATADISK,DISK,srm://grid-cert-03.roma1.infn.it:8446/srm/mana...,2017-06-07 01:46:42,2017-06-07 01:46:40,rucio-conveyor,https://fts3-pilot.cern.ch:8446,34189ea8-8ed9-5432-b417-1ff02aea133e,https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/...,2017-06-07 01:46:52


In [56]:
rucio_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000000 entries, 0 to 2999999
Data columns (total 26 columns):
account                object
activity               object
bytes                  int64
checksum-adler         object
checksum-md5           object
dst-rse                object
dst-type               object
dst-url                object
duration               int64
guid                   object
name                   object
previous-request-id    object
protocol               object
reason                 object
request-id             object
scope                  object
src-rse                object
src-type               object
src-url                object
started_at             object
submitted_at           object
tool-id                object
transfer-endpoint      object
transfer-id            object
transfer-link          object
transferred_at         object
dtypes: int64(2), object(24)
memory usage: 595.1+ MB


In [75]:
duplicates = df.groupby(df.columns.tolist(),as_index=False).size()
duplicates

Series([], dtype: int64)

In [73]:
# reasons = rucio_data['reason'].unique()
# print(reasons,'\n',len(reasons))

df['reason']

0           
1           
2           
3           
4           
5           
6           
7           
8           
9           
10          
11          
12          
13          
14          
15          
16          
17          
18          
19          
20          
21          
22          
23          
24          
25          
26          
27          
28          
29          
          ..
2999970     
2999971     
2999972     
2999973     
2999974     
2999975     
2999976     
2999977     
2999978     
2999979     
2999980     
2999981     
2999982     
2999983     
2999984     
2999985     
2999986     
2999987     
2999988     
2999989     
2999990     
2999991     
2999992     
2999993     
2999994     
2999995     
2999996     
2999997     
2999998     
2999999     
Name: reason, dtype: object

In [57]:
src = rucio_data['src-rse'].unique()
print(src,'\n',len(src))

['RAL-LCG2_DATADISK' 'BNL-OSG2_DATADISK' 'NIKHEF-ELPROD_DATADISK'
 'INFN-ROMA1_DATADISK' 'IN2P3-CC_DATADISK' 'CERN-PROD-RUCIOTEST_DATADISK'
 'FZK-LCG2_DATADISK' 'NDGF-T1_DATADISK' 'TRIUMF-LCG2_DATADISK'
 'RRC-KI-T1_DATADISK' 'PIC_DATADISK' 'IN2P3-LPSC_DATADISK'
 'CERN-PROD_DATADISK' 'INFN-T1_DATADISK' 'PRAGUELCG2_DATADISK'
 'DESY-HH_DATADISK' 'UKI-SOUTHGRID-OX-HEP_DATADISK' 'GOEGRID_DATADISK'
 'INFN-FRASCATI_DATADISK' 'UAM-LCG2_DATADISK' 'AUSTRALIA-ATLAS_DATADISK'
 'UKI-LT2-RHUL_DATADISK' 'INFN-NAPOLI-ATLAS_DATADISK' 'LRZ-LMU_DATADISK'
 'IFIC-LCG2_DATADISK' 'INFN-BOLOGNA-T3_PRODDISK'
 'UKI-NORTHGRID-LANCS-HEP_DATADISK' 'GRIF-IRFU_DATADISK'
 'UTA_SWT2_DATADISK' 'UKI-LT2-QMUL_DATADISK' 'TOKYO-LCG2_DATADISK'
 'UKI-NORTHGRID-MAN-HEP_DATADISK' 'INFN-COSENZA_DATADISK'
 'UKI-SOUTHGRID-BHAM-HEP_DATADISK' 'CSCS-LCG2_DATADISK'
 'CA-VICTORIA-WESTGRID-T2_DATADISK' 'SWT2_CPB_DATADISK'
 'UKI-NORTHGRID-SHEF-HEP_DATADISK' 'TECHNION-HEP_DATADISK' 'MWT2_DATADISK'
 'RAL-LCG2-ECHO_DATADISK' 'RU-PROTVINO-I

In [58]:
dest = rucio_data['dst-rse'].unique()
print(dest,'\n',len(dest))

['GRIF-LPNHE_DATADISK' 'CSCS-LCG2_DATADISK' 'PRAGUELCG2_DATADISK'
 'UKI-LT2-QMUL_DATADISK' 'MWT2_DATADISK' 'IN2P3-LPSC_DATADISK'
 'RRC-KI-T1_DATADISK' 'IEPSAS-KOSICE_DATADISK' 'IN2P3-LAPP_DATADISK'
 'UKI-SOUTHGRID-BHAM-HEP_DATADISK' 'NET2_DATADISK' 'TOKYO-LCG2_DATADISK'
 'DESY-ZN_DATADISK' 'LUCILLE_DATADISK' 'TRIUMF-LCG2_DATADISK'
 'AGLT2_DATADISK' 'EELA-UTFSM_DATADISK' 'UKI-SOUTHGRID-OX-HEP_DATADISK'
 'UKI-SOUTHGRID-CAM-HEP_DATADISK' 'LRZ-LMU_DATADISK' 'SWT2_CPB_DATADISK'
 'UKI-NORTHGRID-LANCS-HEP_DATADISK' 'IN2P3-CC_DATADISK' 'RAL-LCG2_DATADISK'
 'CA-VICTORIA-WESTGRID-T2_DATADISK' 'PIC_DATADISK' 'PSNC_DATADISK'
 'BEIJING-LCG2_DATADISK' 'INFN-FRASCATI_DATADISK' 'IL-TAU-HEP_DATADISK'
 'NDGF-T1_DATADISK' 'INFN-T1_DATADISK' 'UKI-NORTHGRID-LIV-HEP_DATADISK'
 'RAL-LCG2-ECHO_DATADISK' 'UKI-SOUTHGRID-SUSX_DATADISK'
 'UNI-FREIBURG_DATADISK' 'CERN-PROD_DATADISK' 'FMPHI-UNIBA_DATADISK'
 'BNL-OSG2_DATADISK' 'MPPMU_DATADISK' 'UKI-LT2-RHUL_DATADISK'
 'SARA-MATRIX_DATADISK' 'UKI-NORTHGRID-MAN-HEP_D

In [59]:
scopes = rucio_data['scope'].unique()
print(scopes, '\n', len(scopes))

['tests' 'mc15_13TeV' 'mc16_13TeV' 'data16_13TeV' 'mc15_5TeV' 'mc15_valid'
 'user.cantel' 'group.det-indet' 'user.sche' 'user.atraeet' 'user.rjansky'
 'user.jojungge' 'user.aknue' 'user.sfracchi' 'data17_13TeV' 'mc15_14TeV'
 'group.phys-higgs' 'user.ysmirnov' 'data12_8TeV' 'user.morgens'
 'user.hesketh' 'user.moles' 'user.odartsi' 'user.sdjones' 'user.dengfeng'
 'user.pavel' 'user.ekourlit' 'user.mdelgaud' 'user.jbeacham'
 'user.kkarppin' 'user.rgugel' 'user.dkrauss' 'user.mcfayden' 'user.ckato'
 'user.abarton' 'user.pstaroba' 'user.wmccorma' 'data17_1beam'
 'group.perf-flavtag' 'user.cpandini' 'data17_calib' 'group.phys-top'
 'group.phys-exotics' 'user.pereira' 'user.cheny' 'user.manthony'
 'group.phys-gener' 'user.nproklov' 'user.ohaldik' 'user.sanmay'
 'user.aducourt' 'user.mleblanc' 'user.ereynold' 'user.rles' 'user.dkoeck'
 'user.amadsen' 'data17_comm' 'user.jcrane' 'user.rosati' 'user.sshirabe'
 'user.anramire' 'user.jbonilla' 'user.wollrath' 'user.lixia'
 'user.oricken' 'user.va

In [60]:
adlers = rucio_data['checksum-adler'].unique()
len(adlers)

2553256

In [61]:
tool_ids = rucio_data['tool-id'].unique()
tool_ids

array(['rucio-conveyor'], dtype=object)

In [62]:
transfer_links = rucio_data['transfer-link'].unique()
print(transfer_links, len(transfer_links)) 

[ 'https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/job/4a26928e-408e-57c0-881a-0f4b981ee7ca'
 'https://fts3.cern.ch:8449/fts3/ftsmon/#/job/b554dc28-cee8-5feb-b155-5895c91fa694'
 'https://fts3-pilot.cern.ch:8449/fts3/ftsmon/#/job/092402df-2a59-53ce-ad42-21ff3293d4a5'
 ...,
 'https://fts3-test.gridpp.rl.ac.uk:8449/fts3/ftsmon/#/job/54953e7d-0665-5475-ab3e-45fe288e4950'
 'https://fts3-test.gridpp.rl.ac.uk:8449/fts3/ftsmon/#/job/8b95a07b-8d42-5aae-a782-266ce8ded157'
 'https://fts3.cern.ch:8449/fts3/ftsmon/#/job/34c3c52a-705a-550d-bfeb-80cb6dc00499'] 1900657


In [63]:
t_end = rucio_data['transfer-endpoint'].unique()
t_end

array(['https://fts3-pilot.cern.ch:8446', 'https://fts3.cern.ch:8446',
       'https://fts.usatlas.bnl.gov:8446',
       'https://fts3-test.gridpp.rl.ac.uk:8446'], dtype=object)

In [64]:
activities = rucio_data['activity'].unique()
activities

array(['Functional Test', 'Production Output', 'Production Input',
       'Data Consolidation', 'User Subscriptions', 'Express', 'T0 Tape',
       'Staging', 'T0 Export', 'Data Rebalancing', 'Data Brokering',
       'Recovery'], dtype=object)

In [65]:
# rucio_data['reason']
protocols = rucio_data['protocol'].unique()
protocols


array(['srm', 'gsiftp', 'davs'], dtype=object)

In [66]:
dst_types = rucio_data['dst-type'].unique()
dst_types

array(['DISK', 'TAPE'], dtype=object)

In [67]:
src_types = rucio_data['src-type'].unique()
src_types

array(['DISK', 'TAPE'], dtype=object)

# atlas_rucio-events-*