In [35]:
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
from dotenv import load_dotenv
import os

load_dotenv()

True

In [36]:
ELK_HOST = os.getenv("ELK_HOST")

es = Elasticsearch(hosts=[ELK_HOST], timeout=300, max_retries=10, retry_on_timeout=True, verify_certs=False, ssl_show_warn=False)

In [37]:
start_end_times = pd.read_json("dfs_final/start-end-times.json", orient="index")
start_end_times["from"] = pd.to_datetime(
    start_end_times["from"], format="ISO8601", utc=True
)
start_end_times["to"] = pd.to_datetime(
    start_end_times["to"], format="ISO8601", utc=True
)


def get_max_time(index):
    # tmp_serie = pd.Series(
    #     [df_index["@timestamp"].max(), start_end_times.loc[index, "to"]]
    # )
    # return tmp_serie.max()
    return start_end_times.loc[index, "to"]


def get_min_time(index):
    # tmp_serie = pd.Series(
    #     [df_index["@timestamp"].min(), start_end_times.loc[index, "from"]]
    # )
    # return tmp_serie.min()
    return start_end_times.loc[index, "from"]

index_kurento_names = [
    "loadtest-webrtc-final-kurento-2p-t3medium-retry",
    "loadtest-webrtc-final-kurento-8p-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-retry"
]

index_mediasoup_names = [
    # "loadtest-webrtc-final-mediasoup-2p-t3medium",
    # "loadtest-webrtc-final-mediasoup-2p-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-2p-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-2p-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-5p-t3medium",
    # "loadtest-webrtc-final-mediasoup-5p-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-5p-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-5p-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-8p-t3medium",
    # "loadtest-webrtc-final-mediasoup-8p-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-8p-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-8p-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-3p-10s-t3medium",
    # "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-3p-20s-t3medium",
    # "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-3p-40s-t3medium",
    # "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-2",
    # "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-wait",
    # "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-retry",
    # "loadtest-webrtc-final-mediasoup-2p-t3medium-retry-2"
]

index_list_names = index_kurento_names + index_mediasoup_names

In [38]:
def generate_qoe_data_from_elastic(index_name):
    min = int(get_min_time(index_name).timestamp())*1000
    max = int(get_max_time(index_name).timestamp())*1000
    # query: The elasticsearch query.
    query = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"elastic_type": "webrtcStats"}},
                    {"range": {"timestamp": {"gte": min, "lte": max}}},
                ]
            }
        }
    }
    # Scan function to get all the data.
    rel = scan(
        client=es,
        query=query,
        scroll="1m",
        index="openvidu",
        raise_on_error=True,
        preserve_order=False,
        clear_scroll=True,
    )
    # We need only '_source', which has all the fields required.
    # This elimantes the elasticsearch metdata like _id, _type, _index.
    for hit in rel:
        yield hit["_source"]

df_generators = (
    pd.DataFrame(generate_qoe_data_from_elastic(index)) for index in index_list_names
)

df_list = list(df_generators)

In [40]:
for i, df_user in enumerate(df_list):
    if not df_user.empty:
        df_user["timestamp"] = pd.to_datetime(df_user["timestamp"], unit="ms", utc=True)
        df_user.to_csv(f"dfs_final/{index_list_names[i]}-mediaserver-stats.csv", index=False)