In [1]:
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
from dotenv import load_dotenv
import os

load_dotenv()


True

In [2]:
ELK_HOST = os.getenv("ELK_HOST")

es = Elasticsearch(hosts=[ELK_HOST], timeout=300, max_retries=10, retry_on_timeout=True, verify_certs=False, ssl_show_warn=False)

In [11]:
index = "loadtest-webrtc-stats-1712681258923"


def find_user_data(index):
    # query: The elasticsearch query.
    query = {
        "query": {
            "bool": {
                "must": [
                    {"exists": {"field": "new_participant_id"}},
                    {"exists": {"field": "new_participant_session"}}
                ]
            }
        }
    }
    # Scan function to get all the data.
    rel = scan(
        client=es,
        query=query,
        scroll="1m",
        index=index,
        raise_on_error=True,
        preserve_order=False,
        clear_scroll=True,
    )
    # We need only '_source', which has all the fields required.
    # This elimantes the elasticsearch metdata like _id, _type, _index.
    for hit in rel:
        yield hit["_source"]

# open text file users.log and read line by line
times = []
df = pd.DataFrame(find_user_data(index))
with open("users.log", "r") as f:
    users = f.readlines()
    # parse line, separating by |
    for user in users:
        user = user.strip().split("|")
        user_time = user[0].strip()
        user_session = user[1].strip()
        user_id = user[2].strip()
        # filter df by user_id and user_session
        df_filtered = df[(df["new_participant_id"] == user_id) & (df["new_participant_session"] == user_session)]
        if df_filtered.empty:
            # find file in qoe_files/ with user_id and session like v-LoadTestSession1-User1-User2_cuts.json
            # if not found, continue

            # convert format to timestamp Tue Aug 01 13:48:59 CEST 2023
            user_time = pd.to_datetime(
                user_time, format="%a %b %d %H:%M:%S CEST %Y"
            ).tz_localize("Europe/Madrid")
            new_data = {
                "new_participant_id": user_id,
                "new_participant_session": user_session,
                "@timestamp": user_time.tz_convert("UTC"),
            }
            # add new_data to ELK
            es.index(index=index, body=new_data)
