# Used packages and general settings

In [None]:
import re
import datetime
import matplotlib.pyplot as plt
import numpy as np
from elasticsearch import Elasticsearch

%matplotlib inline

# Elasticsearch configuration

In [None]:
username = "username"
password = "password"
es = Elasticsearch([{"host": "es-cms.cern.ch", "port": 9203, "http_auth": username + ":" + password}], use_ssl=True, verify_certs=True, ca_certs="ca-bundle.trust.crt")

# Time filter

In [None]:
def time_filter(indices, days=0, until=0):
    indices = es.cat.indices(index="cms-20*", h="index", request_timeout=600).split("\n")
    indices = sorted(indices)
    indices = [x for x in indices if x != ""]
    if days == 0:
        return ["cms-20*"]
    today = datetime.date.today()
    filtered = []
    datefmt = "%Y-%m-%d"
    for i in indices:
        date = re.sub(r"cms-", "", i).rstrip()
        date = datetime.datetime.strptime(date, datefmt).date()
        diff = today - date
        if until <= diff.days < days + until:
            filtered.append(i.rstrip())
    return filtered

# Indices to be considered

In [None]:
no_of_days = 0
last_day = 0
ind = time_filter(no_of_days, last_day)
ind = ",".join(ind)

# Part 1

Standard mixing vs premixing.

## Query for premixing

In [None]:
body_premixing = {
    "size": 10000,
    "_source": ["CpuTimeHr", "CommittedCoreHr", "EventRate", "MyEff", "Workflow"],
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "Status": "Completed"
                    }
                },
                {
                    "match": {
                        "TaskType": "DIGI"
                    }
                },
                {
                    "regexp": {
                        "Workflow": ".*[pP][rR][eE][mM][iI][xX].*"
                    }
                },
                {
                    "match": {
                        "ExitCode": 0
                    }
                },
                {
                    "range": {
                        "CpuTimeHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "CommittedCoreHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "EventRate": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "RecordTime": {
                            "gte": 1498860000000,
                            "format": "epoch_millis"
                        }
                    }
                }
            ]
        }
    },
    "script_fields": {
        "MyEff": {
            "script": "doc['CpuTimeHr'].value / doc['CommittedCoreHr'].value"
        }
    }
}
res_premixing = es.search(index=ind, body=body_premixing, request_timeout=1200)

## Query for classical mixing

In [None]:
body_mixing = {
    "size": 10000,
    "_source": ["CpuTimeHr", "CommittedCoreHr", "EventRate", "MyEff", "Workflow"],
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "Status": "Completed"
                    }
                },
                {
                    "match": {
                        "TaskType": "DIGI"
                    }
                },
                {
                    "match": {
                        "ExitCode": 0
                    }
                },
                {
                    "range": {
                        "CpuTimeHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "CommittedCoreHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "EventRate": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "RecordTime": {
                            "gte": 1498860000000,
                            "format": "epoch_millis"
                        }
                    }
                }
            ],
            "must_not": [
                {
                    "regexp": {
                        "Workflow": ".*[pP][rR][eE][mM][iI][xX].*"
                    }
                }
            ]
        }
    },
    "script_fields": {
        "MyEff": {
            "script": "doc['CpuTimeHr'].value / doc['CommittedCoreHr'].value"
        }
    }
}
res_mixing = es.search(index=ind, body=body_mixing, request_timeout=1200)

## Function for listing the CPU efficiency and event rate

In [None]:
def listing_cpu_eff_and_event_rate(res):
    event_rate = []
    cpu_eff = []
    records = res["hits"]["hits"]
    for record in records:
        event_rate.append(record["_source"]["EventRate"])
        cpu_eff.append(record["fields"]["MyEff"][0])
    return event_rate, cpu_eff

## Function for plotting a graph of CPU efficiency and event rate

In [None]:
def correlation_graph(cpu_eff, event_rate, title):
    y_max = sorted(event_rate)[-300:-299][0]
    for i in range(0, 2):
        plt.rcParams['figure.figsize'] = (20, 4)
        plt.rcParams.update({"font.size": 15})
        if i == 1:
            plt.yscale("log")
            plt.ylabel("Event rate (log)")
            plt.title(title + " (logarithmic scale)")
        else:
            plt.ylabel("Event rate")
            plt.title(title + " (linear scale)")
            plt.ylim([0, y_max])
        plt.scatter(cpu_eff, event_rate)
        plt.xlabel("CPU efficiency")
        plt.xlim([0.0, 1.0])
        plt.show()

## Function for listing the CPU efficiency and event rate and plotting a graph

In [None]:
def listing_and_graphing(res, title):
    event_rate, cpu_eff = listing_cpu_eff_and_event_rate(res)
    correlation_graph(cpu_eff, event_rate, title)
    return cpu_eff

## Graphs of correlation

In [None]:
cpu_eff_premixing = listing_and_graphing(res_premixing, "DIGI premixing")
cpu_eff_mixing = listing_and_graphing(res_mixing, "DIGI classical mixing")

## Function for histogram plotting

In [None]:
def histogram(data, title):
    y_values, _, _ = plt.hist(data, bins=100, range=[0.0, 1.0])
    plt.title(title)
    plt.ylabel("Frequency")
    plt.xlabel("CPU efficiency")
    plt.margins(0.0, 0.05)
    plt.show()
    return y_values

## Histograms

In [None]:
y_premixing = histogram(cpu_eff_premixing, "DIGI premixing")
y_mixing = histogram(cpu_eff_mixing, "DIGI mixing")

## Function for bar graph

In [None]:
def bar_graph(y_values1, y_values2, label1, label2):
    plt.rcParams['figure.figsize'] = (40, 10)
    plt.rcParams.update({"font.size": 25})
    index = np.arange(len(y_values1))
    bar_width = 0.35
    opacity = 0.8
    plt.bar(index, y_values1, bar_width, alpha=opacity, align="center", color="b", label=label1)
    plt.bar(index + bar_width, y_values2, bar_width, alpha=opacity, align="center", color="g", label=label2)
    plt.legend(loc=9, bbox_to_anchor=(1.07, 0.6))
    plt.margins(0.0, 0.05)
    plt.title("CPU efficiency")
    plt.ylabel("Frequency")
    plt.xlim([0, 100])
    plt.show()

## Bar graph

In [None]:
bar_graph(y_premixing, y_mixing, "DIGI premixing", "DIGI mixing")

# Part 2

Local and remote PU reading.

## Query for local

In [None]:
body_local = {
    "size": 10000,
    "_source": ["CpuTimeHr", "CommittedCoreHr", "EventRate", "MyEff", "Site"],
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "Status": "Completed"
                    }
                },
                {
                    "match": {
                        "TaskType": "DIGI"
                    }
                },
                {
                    "regexp": {
                        "Workflow": ".*[pP][rR][eE][mM][iI][xX].*"
                    }
                },
                {
                    "match": {
                        "ExitCode": 0
                    }
                },
                {
                    "range": {
                        "CpuTimeHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "CommittedCoreHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "EventRate": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "RecordTime": {
                            "gte": 1498860000000,
                            "format": "epoch_millis"
                        }
                    }
                }
            ],
            "should": [
                {
                    "regexp": {
                        "Site": ".*[cC][eE][rR][nN].*"
                    }
                },
                {
                    "regexp": {
                        "Site": ".*[fF][nN][aA][lL]"
                    }
                }
            ]
        }
    },
    "script_fields": {
        "MyEff": {
            "script": "doc['CpuTimeHr'].value / doc['CommittedCoreHr'].value"
        }
    }
}
res_local = es.search(index=ind, body=body_local, request_timeout=1200)

## Query for remote

In [None]:
body_remote = {
    "size": 10000,
    "_source": ["CpuTimeHr", "CommittedCoreHr", "EventRate", "MyEff", "Site"],
    "query": {
        "bool": {
            "must": [
                {
                    "match": {
                        "Status": "Completed"
                    }
                },
                {
                    "match": {
                        "TaskType": "DIGI"
                    }
                },
                {
                    "regexp": {
                        "Workflow": ".*[pP][rR][eE][mM][iI][xX].*"
                    }
                },
                {
                    "match": {
                        "ExitCode": 0
                    }
                },
                {
                    "range": {
                        "CpuTimeHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "CommittedCoreHr": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "EventRate": {
                            "gt": 0
                        }
                    }
                },
                {
                    "range": {
                        "RecordTime": {
                            "gte": 1498860000000,
                            "format": "epoch_millis"
                        }
                    }
                }
            ],
            "must_not": [
                {
                    "regexp": {
                        "Site": ".*[cC][eE][rR][nN].*"
                    }
                },
                {
                    "regexp": {
                        "Site": ".*[fF][nN][aA][lL]"
                    }
                }
            ]
        }
    },
    "script_fields": {
        "MyEff": {
            "script": "doc['CpuTimeHr'].value / doc['CommittedCoreHr'].value"
        }
    }
}
res_remote = es.search(index=ind, body=body_remote, request_timeout=1200)

## Graphs of correlation

In [None]:
cpu_eff_local = listing_and_graphing(res_local, "DIGI local")
cpu_eff_remote = listing_and_graphing(res_remote, "DIGI remote")

## Histograms

In [None]:
y_local = histogram(cpu_eff_local, "DIGI local")
y_remote = histogram(cpu_eff_remote, "DIGI remote")

## Bar graph

In [None]:
bar_graph(y_local, y_remote, "DIGI local", "DIGI remote")