In [1]:
import requests
import json
import pandas as pd
from datetime import datetime as dt
from datetime import timezone
from dateutil import parser

# Show all columns when printing a pandas DataFrame
pd.options.display.max_columns = None

In [2]:
# Craetes a list of the fileds with a file as input
def read_fields(filename):
    fields = []
    fd = open(filename)
    lines = fd.readlines()
    for line in lines:
        fields.append("data."+line[:-1])
    fd.close()
    return fields

def remove_dot_data(fields):
    fields_no_data=[]
    for field in fields:
        field_no_data = field.replace("data.","")
        fields_no_data.append(field_no_data)
    return fields_no_data

def read_token(filename):
    fd = open(filename)
    line = fd.readline()
    # Remove the 'newline' character if found
    if line[len(line)-1] == '\n':
        line = line[:-1]
    fd.close()
    return line

In [78]:
# Where to get the authorization token
file_token="./token"

# Where to get the list of fields to retrieve
file_fields="./fields"

# Read the authorization token
token = read_token(file_token)
headers = {'Authorization': 'Bearer '+token,}

# Set the start(min) and end(max) dates to perform the query onto
min_date_human_readable = "Jun 25 00:00:00 UTC 2019"
max_date_human_readable = "Jun 25 23:59:59 UTC 2019"

# Get timestamps in miliseconds
min_date = str(int(parser.parse(min_date_human_readable).timestamp() * 1000))
max_date = str(int(parser.parse(max_date_human_readable).timestamp() * 1000))

# The query on lucene syntaxis
query =               "data.Type:analysis"
query = query + " AND  data.Status:Completed"
query = query + " AND  data.JobUniverse:5"
query = query + " AND  data.CRAB_Workflow:190529_225118*gomber_crab_job_TTJets_FXFX"
#query = query + " AND  data.CRAB_Id:7716"


# Read the list of fields to retrieve from a file
fields = read_fields(file_fields)
fields= ['data.CRAB_Workflow', 'data.CRAB_Id', 'data.CRAB_Retry','data.ScheddName']
fields_no_data = remove_dot_data(fields)

# Number of records to retreive
num_records = 10000

In [79]:
print("Query:")
print(query)
print("Fields no data:")
print(fields_no_data)

data = {
        "size":num_records,
        "query":{
            "bool":{
                "filter":[
                    {"range":{"data.RecordTime":{"gte":min_date,"lte":max_date,"format":"epoch_millis"}}},
                    {"query_string":{
                        "analyze_wildcard":"true",
                        "query":query
                        }
                    }
                ]
            }
        },
        "_source":fields
}

data_string = json.dumps(data)

Query:
data.Type:analysis AND  data.Status:Completed AND  data.JobUniverse:5 AND  data.CRAB_Workflow:190529_225118*gomber_crab_job_TTJets_FXFX
Fields no data:
['CRAB_Workflow', 'CRAB_Id', 'CRAB_Retry', 'ScheddName']


In [80]:
# Send the query (It takes few seconds)
response = requests.get('https://monit-grafana.cern.ch/api/datasources/proxy/9014/_search', headers=headers, data=data_string)

In [81]:
# Get the data from the response
d = response.json()
# Create a pandas DataFrame with the data retreived
clean_records=[]
no_data_count=0
for record in d['hits']['hits']:
    try:
        clean_record= record['_source']['data']
        clean_records.append(clean_record)
    except:
        no_data_count = no_data_count +1
df = pd.DataFrame(clean_records) 

In [83]:
df.sort_values(['CRAB_Id','CRAB_Retry'])

Unnamed: 0,CRAB_Id,CRAB_Retry,CRAB_Workflow,ScheddName
281,100,7,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
209,1012,7,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
381,1012,8,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
399,1012,9,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
12,1024,3,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
171,1026,3,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
349,1080,7,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
426,1081,6,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
470,1081,7,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch
319,1081,8,190529_225118:gomber_crab_job_TTJets_FXFX,crab3@vocms0107.cern.ch


In [32]:
# Print the DataFrame
df['Exit'] = df['Chirp_CRAB3_Job_ExitCode'].apply(lambda x: 0 if x == 0.0 else 1)

In [52]:
df_g = df.groupby(['CRAB_Workflow','CRAB_Id']).agg({"CRAB_Retry":"count", "Exit":"sum"})

In [53]:
df_g

Unnamed: 0_level_0,Unnamed: 1_level_0,CRAB_Retry,Exit
CRAB_Workflow,CRAB_Id,Unnamed: 2_level_1,Unnamed: 3_level_1
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,6332,1,0
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,8083,1,0
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,8246,1,0
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,8313,1,1
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,8569,1,1
190527_220527:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_nospinner-filter-v2,8687,1,0
190604_044933:mdjordje_crab_test_SingleMuon_Run2017F_L,1912,1,1
190604_174157:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_PS-filter-v3,1110,1,0
190604_174157:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_PS-filter-v3,1117,1,0
190604_174157:dwinterb_crab_VBFHToTauTau_M125_13TeV_powheg_pythia8_PS-filter-v3,3473,1,0


In [54]:
df_g['diff'] = df_g['CRAB_Retry'] - df_g['Exit']

In [55]:
df_g[df_g['diff']>1].count()

CRAB_Retry    49
Exit          49
diff          49
dtype: int64

In [56]:
df_g.count()

CRAB_Retry    9315
Exit          9315
diff          9315
dtype: int64