In [1]:
import os

json_file_name = "flash_state_traces.json"

In [None]:
import json

# Read the JSON file
with open(json_file_name, 'r') as file:
    data = json.load(file)

data

In [None]:
## The json has key values of the form: {'0': {'guid': '0', 'messages': ''2020-01-29 18:42:18 \tscreen_off\n2020-01-29 18:42:18 \tscreen_lock\n2020-01-29 18:42:18 \t100.0%\n2020-01-29 18:42:18 \twifi\n2020-01-29 18:43:56 \tscreen_on\n2020-01-29 18:44:06 \tscreen_unlock', 'model': 'CPH1909'}}
## Create a dictionary per trainer
## Write a function to get a list of tuples per trainer
## Each element in the tuple: (timestamp, event)
def parse_messages(messages):
    events = []
    for line in messages.split('\n'):
        timestamp, event = line.split('\t')
        timestamp = timestamp.strip()
        events.append((timestamp, event))
    return events

trainers_data = {}
for key, value in data.items():
    guid = value['guid']
    messages = value['messages']
    trainers_data[guid] = parse_messages(messages)

trainers_data

In [None]:
import random

## Trainers_data has many trainers, give the count.

## Then randomly select N trainers from it where N is configurable. E.g. N = 300. 

## create a subset of trainers data from original trainers data
# Count the number of trainers
trainer_count = len(trainers_data)
print(f"Total number of trainers: {trainer_count}")

# Configurable number of trainers to select, as long as they have wifi
# and battery_charged_on
selected_trainers = [
    trainer for trainer in trainers_data.keys()
    if any(event == 'wifi' for _, event in trainers_data[trainer]) and
       any(event == 'battery_charged_on' for _, event in trainers_data[trainer])
]

N = 850

# Randomly select N trainers from the filtered list
selected_trainers = random.sample(selected_trainers, min(N, len(selected_trainers)))

# Create a subset of trainers data
subset_trainers_data = {trainer: trainers_data[trainer] for trainer in selected_trainers}

print(f"Total subset of trainers: {len(subset_trainers_data)}")

subset_trainers_data

In [None]:
## Use the tuple list to create sub-lists of events: WiFi, battery level, charging
## The dict has keys like {'0': [('2020-01-29 18:42:18', 'screen_off'), ('2020-01-29 18:42:18', 'screen_lock'), ('2020-01-29 18:42:18', '100.0%')]}
## I want to create a new dict with keys like {'0': {'screen_off': ['2020-01-29 18:42:18'], 'screen_lock': ['2020-01-29 18:42:18'], 'battery': [('2020-01-29 18:42:18', '100.0%')], 'network': [('2020-02-04 07:49:41', '2G')], 'battery_charged_on': ['2020-02-03 21:00:54'], 'battery_charged_off': ['2020-02-03 21:00:54']}}
## can discard the timestamps with event = 'Unknown'
## Write a function to create the new dictinary for each trainer using trainers_data and the above desired format
def create_trainer_dict(trainers_data):
    new_dict = {}
    for guid, events in trainers_data.items():
        trainer_dict = {
            'screen_off': [],
            'screen_lock': [],
            'screen_on': [],
            'screen_unlock': [],
            'battery': [],
            'network': [],
            'battery_charged_on': [],
            'battery_charged_off': []
        }
        for timestamp, event in events:
            if event == 'Unknown':
                continue
            if 'screen_off' in event:
                trainer_dict['screen_off'].append(timestamp)
            elif 'screen_lock' in event:
                trainer_dict['screen_lock'].append(timestamp)
            elif 'screen_on' in event:
                trainer_dict['screen_on'].append(timestamp)
            elif 'screen_unlock' in event:
                trainer_dict['screen_unlock'].append(timestamp)
            elif '%' in event:
                trainer_dict['battery'].append((timestamp, event))
            elif 'wifi' in event or '2G' in event or '3G' in event or '4G' in event:
                trainer_dict['network'].append((timestamp, event))
            elif 'battery_charged_on' in event:
                trainer_dict['battery_charged_on'].append(timestamp)
            elif 'battery_charged_off' in event:
                trainer_dict['battery_charged_off'].append(timestamp)
        new_dict[guid] = trainer_dict
    return new_dict

new_trainers_data = create_trainer_dict(subset_trainers_data)
new_trainers_data

In [None]:
new_trainers_data['946'].keys()

In [7]:
## Availability criteria for trainers is connected to wifi and charging. Using the new_trainers_data, write a function to get the list of trainers who satisfy the availability criteria and for the durations of time that they are available and unavailable.

In [None]:
from datetime import datetime

# get favourable network conditions for all trainers i.e when trainers
# are on wifi. The current network events in trainer_data are 'wifi',
# '2G', '3G', '4G'. At a timestamp of 'wifi' event, the trainer has
# switched to wifi and remains on wifi until the next network event
# that is not wifi. For each trainer, give a list of tuples of the
# form: [(start_timestamp, duration_of_wifi_seconds)] and store it in
# a new key called 'wifi' in the trainer's dictionary.

def calculate_wifi_durations(trainers_data):
    for guid, all_device_telemetry in trainers_data.items():
        wifi_durations = []
        wifi_start = None

        for timestamp, event in all_device_telemetry['network']:
            if event == 'wifi':
                wifi_start = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            elif wifi_start and event in ['2G', '3G', '4G']:
                wifi_end = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                duration = (wifi_end - wifi_start).total_seconds()
                wifi_durations.append((wifi_start.strftime('%Y-%m-%d %H:%M:%S'), duration))
                wifi_start = None

        # If still on WiFi at the end of the events list
        if wifi_start:
            wifi_end = datetime.strptime(all_device_telemetry['network'][-1][0], '%Y-%m-%d %H:%M:%S')
            duration = (wifi_end - wifi_start).total_seconds()
            wifi_durations.append((wifi_start.strftime('%Y-%m-%d %H:%M:%S'), duration))

        trainers_data[guid]['wifi_on'] = wifi_durations

calculate_wifi_durations(new_trainers_data)
new_trainers_data

In [None]:
new_trainers_data['946']['wifi_on']

In [10]:
# rename the battery property to battery_level for all trainers in the
# new_trainers_data dictionary

for trainer in new_trainers_data.values():
    trainer['battery_level'] = trainer.pop('battery')

In [11]:
# combine all battery_charged_on and battery_charged_off events for
# each trainer into a single list of events for the trainer as
# battery_charging_state and update the dictionary for each trainer.
# for all battery_charged_on values, it should append (timestamp,
# 'charging_on'). Similarly for battery_charged_off values, it should
# append (timestamp, 'charging_off'). Sort the list of events by the timestamp.

for trainer in new_trainers_data.values():
    battery_charging_state = []
    for timestamp in trainer['battery_charged_on']:
        battery_charging_state.append((timestamp, 'charging_on'))
    for timestamp in trainer['battery_charged_off']:
        battery_charging_state.append((timestamp, 'charging_off'))
    battery_charging_state.sort()
    trainer['battery_charging_state'] = battery_charging_state

In [None]:
# get favourable charging conditions for all trainers i.e when
# trainers are charging. The current charging events in trainer_data
# are 'battery_charged_on' and 'battery_charged_off'. At a timestamp
# of 'battery_charged_on' event, the trainer has started charging and
# remains charging until the next 'battery_charged_off' event. For
# each trainer, give a list of tuples of the form: [(start_timestamp,
# duration_of_charging_seconds)] and store it in a new key called
# 'charging' in the trainer's dictionary. If the trainer is still on
# charging at the end of the events list, then the end time is the
# last timestamp in the list.

def calculate_charging_durations(trainers_data):
    for guid, all_device_telemetry in trainers_data.items():
        charging_durations = []
        charging_start = None

        for timestamp, event in all_device_telemetry['battery_charging_state']:
            if event == 'charging_on':
                charging_start = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            elif charging_start and event == 'charging_off':
                charging_end = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                duration = (charging_end - charging_start).total_seconds()
                charging_durations.append((charging_start.strftime('%Y-%m-%d %H:%M:%S'), duration))
                charging_start = None

        # If still charging at the end of the events list
        if charging_start:
            charging_end = datetime.strptime(all_device_telemetry['battery_level'][-1][0], '%Y-%m-%d %H:%M:%S')
            duration = (charging_end - charging_start).total_seconds()
            charging_durations.append((charging_start.strftime('%Y-%m-%d %H:%M:%S'), duration))

        trainers_data[guid]['charging'] = charging_durations

calculate_charging_durations(new_trainers_data)
new_trainers_data

In [None]:
new_trainers_data['946']['battery_charged_off']

In [None]:
new_trainers_data['946'].keys()

In [None]:
new_trainers_data['946']['charging']

In [None]:
# For each trainer, the availability criteria is that the trainer is
# on wifi and charging. Write a function to add a property to each
# trainer in the dictionary called 'availability' which is a list of
# tuples of the form: [(start_timestamp, duration)] where the trainer
# is available for the duration of time in seconds starting at the
# start_timestamp. The availability is the intersection of the wifi on
# time and charging time. If the trainer is charging and on wifi at
# the same time, then the availability is the duration of time that is
# common.

from datetime import timedelta


def calculate_availability(trainers_data):
    for guid, telemetry in trainers_data.items():
        availability = []
        wifi_intervals = telemetry['wifi_on']
        charging_intervals = telemetry['charging']

        for wifi_start, wifi_duration in wifi_intervals:
            wifi_start_dt = datetime.strptime(wifi_start, '%Y-%m-%d %H:%M:%S')
            wifi_end_dt = wifi_start_dt + timedelta(seconds=wifi_duration)

            for charging_start, charging_duration in charging_intervals:
                charging_start_dt = datetime.strptime(charging_start, '%Y-%m-%d %H:%M:%S')
                charging_end_dt = charging_start_dt + timedelta(seconds=charging_duration)

                # Find the intersection of the two intervals
                start_dt = max(wifi_start_dt, charging_start_dt)
                end_dt = min(wifi_end_dt, charging_end_dt)

                if start_dt < end_dt:
                    duration = (end_dt - start_dt).total_seconds()
                    availability.append((start_dt.strftime('%Y-%m-%d %H:%M:%S'), duration))

        trainers_data[guid]['availability'] = availability

calculate_availability(new_trainers_data)
new_trainers_data

In [None]:
new_trainers_data['946']['availability']

In [None]:
from collections import defaultdict

# using the trainer availability data, write a function to get the
# count of trainers who satisfy the availability criteria for each
# second starting from the earliest screen_on event to the last
# screen_on event. Return this list of tuple: [(timestamp, count)].

def count_available_trainers(trainers_data):

    # Find the earliest and latest screen_on events
    earliest = None
    latest = None
    for telemetry in trainers_data.values():
        for timestamp in telemetry['screen_on']:
            dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            if earliest is None or dt < earliest:
                earliest = dt
            if latest is None or dt > latest:
                latest = dt

    # Initialize a dictionary to count availability per second
    availability_count = defaultdict(int)

    # Iterate over each trainer's availability and count the seconds
    for telemetry in trainers_data.values():
        for start, duration in telemetry['availability']:
            start_dt = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            for second in range(int(duration)):
                current_time = start_dt + timedelta(seconds=second)
                availability_count[current_time] += 1

    # Convert the dictionary to a sorted list of tuples
    availability_list = sorted((dt.strftime('%Y-%m-%d %H:%M:%S'), count) for dt, count in availability_count.items())

    return availability_list

availability_counts = count_available_trainers(new_trainers_data)
availability_counts

In [None]:
# Write a function to plot the availability counts for each second
# using availability_counts. The x-axis should be the timestamp and
# the y-axis should be the count of available trainers as a percentage
# based on the total number of trainers in the trainers_data.

import matplotlib.pyplot as plt

def plot_availability_counts(availability_counts, total_trainers):
    # Extract timestamps and counts
    timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in availability_counts]
    counts = [count / total_trainers * 100 for _, count in availability_counts]

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, counts, label='Availability (%)')
    plt.xlabel('Timestamp')
    plt.ylabel('Available Trainers (%)')
    plt.title('Availability of Trainers Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_availability_counts(availability_counts, N)

In [None]:
# Get favourable battery conditions for all trainers i.e when battery
# level is above 80%. The current battery_level events in trainer_data
# are a 
# list of tuples of the form: [(timestamp, battery_level)]. For each
# trainer, give a list of tuples of the form: [(start_timestamp,
# duration_of_battery_level_seconds)] for the durations where the
# battery level is greater than or equal to the configured base
# threshold (e.g. 80%) and store it in a new key
# called 'eligible_battery_level' in the trainer's dictionary. If
# the trainer is still on battery_level at the end of the events list,
# then the end time is the last timestamp in the list.

def calculate_eligible_battery_level(trainers_data, threshold=50):
    for guid, telemetry in trainers_data.items():
        eligible_battery_durations = []
        battery_start = None

        for timestamp, event in telemetry['battery_level']:
            battery_level = float(event.strip('%'))
            if battery_level >= threshold:
                if not battery_start:
                    battery_start = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            else:
                if battery_start:
                    battery_end = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                    duration = (battery_end - battery_start).total_seconds()
                    eligible_battery_durations.append((battery_start.strftime('%Y-%m-%d %H:%M:%S'), duration))
                    battery_start = None

        # If still eligible at the end of the events list
        if battery_start:
            battery_end = datetime.strptime(telemetry['battery_level'][-1][0], '%Y-%m-%d %H:%M:%S')
            duration = (battery_end - battery_start).total_seconds()
            eligible_battery_durations.append((battery_start.strftime('%Y-%m-%d %H:%M:%S'), duration))

        trainers_data[guid]['eligible_battery_level'] = eligible_battery_durations

calculate_eligible_battery_level(new_trainers_data)
new_trainers_data

In [None]:
from datetime import timedelta

# For each trainer, the basic eligility criteria is that the trainer is
# on wifi and meets the eligible battery level. Write a function to add a property to each
# trainer in the dictionary called 'basic_eligibility_met' which is a list of
# tuples of the form: [(start_timestamp, duration)] where the trainer
# is available for the duration of time in seconds starting at the
# start_timestamp. The availability is the intersection of the wifi on
# time and eligible battery level. If the trainer has eligible battery
#level and is on wifi at
# the same time, then the eligibility is the duration of time that is
# common.

def calculate_basic_eligibility(trainers_data):
    for guid, telemetry in trainers_data.items():
        basic_eligibility = []
        wifi_intervals = telemetry['wifi_on']
        battery_intervals = telemetry['eligible_battery_level']

        for wifi_start, wifi_duration in wifi_intervals:
            wifi_start_dt = datetime.strptime(wifi_start, '%Y-%m-%d %H:%M:%S')
            wifi_end_dt = wifi_start_dt + timedelta(seconds=wifi_duration)

            for battery_start, battery_duration in battery_intervals:
                battery_start_dt = datetime.strptime(battery_start, '%Y-%m-%d %H:%M:%S')
                battery_end_dt = battery_start_dt + timedelta(seconds=battery_duration)

                # Find the intersection of the two intervals
                start_dt = max(wifi_start_dt, battery_start_dt)
                end_dt = min(wifi_end_dt, battery_end_dt)

                if start_dt < end_dt:
                    duration = (end_dt - start_dt).total_seconds()
                    basic_eligibility.append((start_dt.strftime('%Y-%m-%d %H:%M:%S'), duration))

        trainers_data[guid]['basic_eligibility_met'] = basic_eligibility

calculate_basic_eligibility(new_trainers_data)
new_trainers_data

In [None]:
# using the trainer eligibility data, write a function to get the
# count of trainers who satisfy the eligibility criteria for each
# second starting from the earliest screen_on event to the last
# screen_on event. Return this list of tuple: [(timestamp, count)].

def count_eligible_trainers(trainers_data):
    # Find the earliest and latest screen_on events
    earliest = None
    latest = None
    for telemetry in trainers_data.values():
        for timestamp in telemetry['screen_on']:
            dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            if earliest is None or dt < earliest:
                earliest = dt
            if latest is None or dt > latest:
                latest = dt

    # Initialize a dictionary to count eligibility per second
    eligibility_count = defaultdict(int)

    # Iterate over each trainer's basic eligibility and count the seconds
    for telemetry in trainers_data.values():
        for start, duration in telemetry['basic_eligibility_met']:
            start_dt = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            for second in range(int(duration)):
                current_time = start_dt + timedelta(seconds=second)
                eligibility_count[current_time] += 1

    # Convert the dictionary to a sorted list of tuples
    eligibility_list = sorted((dt.strftime('%Y-%m-%d %H:%M:%S'), count) for dt, count in eligibility_count.items())

    return eligibility_list

eligibility_counts = count_eligible_trainers(new_trainers_data)
eligibility_counts

In [None]:
# Write a function to plot the eligibility counts for each second
# using eligibility_counts. The x-axis should be the timestamp and
# the y-axis should be the count of eligible trainers as a percentage
# based on the total number of trainers in the trainers_data.

def plot_eligibility_counts(eligibility_counts, total_trainers):
    # Extract timestamps and counts
    timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in eligibility_counts]
    counts = [count / total_trainers * 100 for _, count in eligibility_counts]

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, counts, label='Eligibility (%)')
    plt.xlabel('Timestamp')
    plt.ylabel('Eligible Trainers (%)')
    plt.title('Eligibility of Trainers Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_eligibility_counts(eligibility_counts, N)

In [None]:
# Overlay the availability and eligibility plots on the same graph to
# compare the two. with X axis as the timestamp and Y axis as the
# count of available and eligible trainers as a percentage based on
# the total number of trainers in the trainers_data.

def plot_availability_and_eligibility(availability_counts, eligibility_counts, total_trainers):
    # Extract timestamps and counts for availability
    availability_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in availability_counts]
    availability_counts = [count / total_trainers * 100 for _, count in availability_counts]

    # Extract timestamps and counts for eligibility
    eligibility_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in eligibility_counts]
    eligibility_counts = [count / total_trainers * 100 for _, count in eligibility_counts]

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(availability_timestamps, availability_counts, label='Availability (%)', color='blue')
    plt.plot(eligibility_timestamps, eligibility_counts, label='Eligibility (%)', color='green')
    plt.xlabel('Timestamp')
    plt.ylabel('Trainers (%)')
    plt.title('Availability and Eligibility of Trainers Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_availability_and_eligibility(availability_counts, eligibility_counts, N)

In [None]:
# Create a timeline plot using 'charging' field of the training data
# to show the percentage of devices charging at any given point in
# time i.e. every 1 second. Take this as the only availability critera
# and plot the timeline. Cut the timeline beofre 2020-01-27.

def count_charging_trainers(trainers_data):
    # Initialize a dictionary to count charging per second
    charging_count = defaultdict(int)

    # Iterate over each trainer's charging intervals and count the seconds
    for telemetry in trainers_data.values():
        for start, duration in telemetry['charging']:
            start_dt = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            if start_dt < datetime(2020, 1, 27):
                continue
            for second in range(int(duration)):
                current_time = start_dt + timedelta(seconds=second)
                charging_count[current_time] += 1

    # Convert the dictionary to a sorted list of tuples
    charging_list = sorted((dt.strftime('%Y-%m-%d %H:%M:%S'), count) for dt, count in charging_count.items())

    return charging_list

charging_counts = count_charging_trainers(new_trainers_data)

def plot_charging_counts(charging_counts, total_trainers):
    # Extract timestamps and counts
    timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_counts]
    counts = [count / total_trainers * 100 for _, count in charging_counts]

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, counts, label='Charging (%)')
    plt.xlabel('Timestamp')
    plt.ylabel('Charging Trainers (%)')
    plt.title('Charging Trainers Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_charging_counts(charging_counts, N)

In [None]:
# Create a timeline plot using 'charging' and 'battery level' fields of the training data
# to show the percentage of devices charging or have a battery level
# greater than 50% at any given point in
# time i.e. every 1 second. Take this as the only availability critera
# and plot the timeline. Cut the timeline beofre 2020-01-27.

# FIXME: Bug. We are iterating twice over the telemetry: once
# for charging and once for battery level. There might be double
# counting if a trainer is both charging and has a battery level above
# threshold. So at a given timestamp, a trainer contributes twice
# instead of once.

def count_charging_or_battery_trainers(trainers_data, threshold=50):
    # Initialize a dictionary to count charging or battery level per second
    charging_or_battery_count = defaultdict(int)

    # Iterate over each trainer's charging intervals and battery levels
    for telemetry in trainers_data.values():
        # Count charging intervals
        for start, duration in telemetry['charging']:
            start_dt = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            if start_dt < datetime(2020, 1, 27):
                continue
            for second in range(int(duration)):
                current_time = start_dt + timedelta(seconds=second)
                charging_or_battery_count[current_time] += 1

        # Count battery level intervals
        for start, duration in telemetry['eligible_battery_level']:
            start_dt = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            if start_dt < datetime(2020, 1, 27):
                continue
            for second in range(int(duration)):
                current_time = start_dt + timedelta(seconds=second)
                charging_or_battery_count[current_time] += 1

    # Convert the dictionary to a sorted list of tuples
    charging_or_battery_list = sorted((dt.strftime('%Y-%m-%d %H:%M:%S'), count) for dt, count in charging_or_battery_count.items())

    return charging_or_battery_list

charging_or_battery_counts = count_charging_or_battery_trainers(new_trainers_data)

def plot_charging_or_battery_counts(charging_or_battery_counts, total_trainers):
    # Extract timestamps and counts
    timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_or_battery_counts]
    counts = [count / total_trainers * 100 for _, count in charging_or_battery_counts]

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, counts, label='Charging or Battery > 50% (%)')
    plt.xlabel('Timestamp')
    plt.ylabel('Trainers (%)')
    plt.title('Trainers Charging or Battery > 50% Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_charging_or_battery_counts(charging_or_battery_counts, N)

In [None]:
from scipy.integrate import simpson as simps

# plot charging_or_battery_counts and charging_counts on the same
# graph. Also calculate and print the ratio of the area under the
# curves. Annotate the plot with the ratio.

def plot_charging_and_battery_counts(charging_counts, charging_or_battery_counts, total_trainers):
    # Extract timestamps and counts for charging
    charging_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_counts]
    charging_counts = [count / total_trainers * 100 for _, count in charging_counts]

    # Extract timestamps and counts for charging or battery
    charging_or_battery_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_or_battery_counts]
    charging_or_battery_counts = [count / total_trainers * 100 for _, count in charging_or_battery_counts]

    # Calculate the area under the curves
    area_charging = simps(charging_counts, dx=1)
    area_charging_or_battery = simps(charging_or_battery_counts, dx=1)
    ratio = area_charging_or_battery / area_charging

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(charging_timestamps, charging_counts, label='Charging (%)', color='blue')
    plt.plot(charging_or_battery_timestamps, charging_or_battery_counts, label='Charging or Battery > 50% (%)', color='green')
    plt.xlabel('Timestamp')
    plt.ylabel('Trainers (%)')
    plt.title('Charging and Charging or Battery > 50% Over Time')
    plt.legend()
    plt.grid(True)

    # Annotate the plot with the ratio
    plt.annotate(f'Ratio: {ratio:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red', 
                 bbox=dict(facecolor='white', alpha=0.8))

    plt.show()

plot_charging_and_battery_counts(charging_counts, charging_or_battery_counts, N)

In [None]:
# I need to plot a small section of the above timeline plot from 2020-01-30
# to 2020-02-01. These are the rcparams to use: # Set rcParams for
# text size 'axes.labelsize': 16, 'axes.titlesize': 16,
# 'xtick.labelsize': 15, 'ytick.labelsize': 15, 'figure.figsize': [6,
# 4]. Use the same data as the previous plot. The y axis should be
# percentage of available trainers. The x axis should be the
# timestamp. the legend should have green line from above as AVL_EVAL
# and blue line from above as AVL_TRAIN. There should be no title for
# the plot.

import matplotlib.pyplot as plt

# Set rcParams
plt.rcParams.update({
    'axes.labelsize': 16,
    'axes.titlesize': 16,
    'xtick.labelsize': 15,
    'ytick.labelsize': 15,
    'figure.figsize': [6, 4]
})

# Filter the data for the specified date range
start_date = datetime.strptime('2020-01-29', '%Y-%m-%d')
end_date = datetime.strptime('2020-02-02', '%Y-%m-%d')

filtered_charging_counts = [
    (ts, count) for ts, count in charging_counts
    if start_date <= datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') <= end_date
]

filtered_charging_or_battery_counts = [
    (ts, count) for ts, count in charging_or_battery_counts
    if start_date <= datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') <= end_date
]

# Extract timestamps and counts for the filtered data
charging_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in filtered_charging_counts]
fin_charging_counts = [count / N * 100 for _, count in filtered_charging_counts]

charging_or_battery_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in filtered_charging_or_battery_counts]
fin_charging_or_battery_counts = [count / N * 100 for _, count in filtered_charging_or_battery_counts]

# Plot the data
plt.plot(charging_timestamps, fin_charging_counts, label='AVL_TRAIN', color='blue')
plt.plot(charging_or_battery_timestamps, fin_charging_or_battery_counts, label='AVL_EVAL', color='green')
plt.xlabel('Timestamp')
plt.ylabel('Trainers (%)')
plt.legend()
plt.grid(True)

# Fix the x-axis ticks to just DD-MM
# Fix the x-axis ticks to just DD-MM and avoid crowding of labels

plt.gca().xaxis.set_major_locator(plt.matplotlib.dates.DayLocator())
plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%d-%m'))

# Set rotation to 0 to keep labels horizontal
plt.gca().tick_params(axis='x', rotation=0)

# Save the plot as a PDF
plt.tight_layout()
plt.savefig('new_availability.pdf', format='pdf', bbox_inches='tight')
plt.show()

In [None]:
!pip install scipy

In [None]:
from scipy.integrate import simpson as simps

# plot charging_or_battery_counts and charging_counts on the same
# graph. Also calculate and print the ratio of the area under the
# curves. Annotate the plot with the ratio.

def plot_charging_and_battery_counts(charging_counts, charging_or_battery_counts, total_trainers):
    # Extract timestamps and counts for charging
    charging_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_counts]
    charging_counts = [count / total_trainers * 100 for _, count in charging_counts]

    # Extract timestamps and counts for charging or battery
    charging_or_battery_timestamps = [datetime.strptime(ts, '%Y-%m-%d %H:%M:%S') for ts, _ in charging_or_battery_counts]
    charging_or_battery_counts = [count / total_trainers * 100 for _, count in charging_or_battery_counts]

    # Calculate the area under the curves
    area_charging = simps(charging_counts, dx=1)
    area_charging_or_battery = simps(charging_or_battery_counts, dx=1)
    ratio = area_charging_or_battery / area_charging

    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(charging_timestamps, charging_counts, label='Charging (%)', color='blue')
    plt.plot(charging_or_battery_timestamps, charging_or_battery_counts, label='Charging or Battery > 50% (%)', color='green')
    plt.xlabel('Timestamp')
    plt.ylabel('Trainers (%)')
    plt.title('Charging and Charging or Battery > 50% Over Time')
    plt.legend()
    plt.grid(True)

    # Annotate the plot with the ratio
    plt.annotate(f'Ratio: {ratio:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red', 
                 bbox=dict(facecolor='white', alpha=0.8))

    plt.show()

plot_charging_and_battery_counts(charging_counts, charging_or_battery_counts, N)

In [None]:
new_trainers_data['946'].keys()

In [31]:
# Descriptions of important keys:
# battery_level:
# charging:
# availability: 
# eligibile_battery_level:
# basic_eligibility_met:


In [None]:
new_trainers_data['946']['eligible_battery_level']

In [None]:
new_trainers_data['946']['charging']

In [None]:
new_trainers_data['946']['battery_level']


In [35]:
from datetime import datetime, timedelta

# The trainer can exist in one of the three states: AVL_TRAIN,
# AVL_EVAL, UN_AVL. The trainer is in AVL_TRAIN state if the trainer
# is charging. It is AVL_EVAL if either the trainer is charging or
# meets the 
# eligible battery level. It is UN_AVL if the trainer is neither
# charging nor has the eligible battery level. 
# Write a function to generate timestamped events in the property
# "TRN_AVL_EVENTS_TS" for each trainer in the new_trainers_data when
# the trainer transitions between the states. The events should be of
# the form: (timestamp, state). Initial state can be UN_AVL. The timestamp should be the start of
# the new state from the above three states based on the criteria.
# Note that the charging and eligible_battery_level events are
# (timestamp, duration) tuples. So their durations need to be
# considered while switching events. There could be overlaps in the
# charging and eligible_battery_level events.

def generate_trainer_availability_events(trainers_data):
    for guid, telemetry in trainers_data.items():
        events = []
        current_state = 'UN_AVL'
        state_end_times = {'charging': None, 'battery': None}

        # Combine and sort all relevant events
        combined_events = []
        for timestamp, duration in telemetry['charging']:
            start_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            end_dt = start_dt + timedelta(seconds=duration)
            combined_events.append((start_dt, 'charging_on'))
            combined_events.append((end_dt, 'charging_off'))

        for timestamp, duration in telemetry['eligible_battery_level']:
            start_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            end_dt = start_dt + timedelta(seconds=duration)
            combined_events.append((start_dt, 'battery_on'))
            combined_events.append((end_dt, 'battery_off'))

        combined_events.sort()

        # Process events to determine state transitions
        for event_time, event_type in combined_events:
            if event_type == 'charging_on':
                state_end_times['charging'] = event_time + timedelta(seconds=1)
            elif event_type == 'charging_off':
                state_end_times['charging'] = None
            elif event_type == 'battery_on':
                state_end_times['battery'] = event_time + timedelta(seconds=1)
            elif event_type == 'battery_off':
                state_end_times['battery'] = None

            new_state = 'UN_AVL'
            if state_end_times['charging']:
                new_state = 'AVL_TRAIN'
            elif state_end_times['battery']:
                new_state = 'AVL_EVAL'

            if new_state != current_state:
                events.append((event_time.strftime('%Y-%m-%d %H:%M:%S'), new_state))
                current_state = new_state

        telemetry['TRN_AVL_EVENTS_TS'] = events

generate_trainer_availability_events(new_trainers_data)


In [41]:
# The trainers indicate 2 or 3 levels of events. In the first case of
# 2-level-events, the trainer can exist in either AVL_TRAIN or UN_AVL.
# In the case of 3-level-events, the trainer can exist in AVL_TRAIN, AVL_EVAL, UN_AVL. The trainer is in AVL_TRAIN state if the trainer
# is charging. It is AVL_EVAL if either the trainer is charging or
# meets or exceeds the 
# threshold battery level. It is UN_AVL if the trainer is neither
# charging nor has the battery level threshold. In the case of 2-level
# events, only the charging events are considered. In the case of
# 3-level events, both charging and eligible_battery_level events are
# considered.
# Write a function to generate timestamped events in a property that 
# specifies the event-level type and the battery_threshold
# for each trainer in the new_trainers_data when
# the trainer transitions between the states. The event-level type and
# battery threshold should be taken as input and be used in naming the
# property. The corresponding events in the property should be of
# the form: (timestamp, state). Initial state can be UN_AVL. The timestamp should be the start of
# the new state from the above three states based on the criteria.
# Note that the charging and eligible_battery_level events are
# (timestamp, duration) tuples. So their durations need to be
# considered while switching events. There could be overlaps in the
# charging and eligible_battery_level events.

def generate_trainer_events(trainers_data, event_level, battery_threshold):
    for guid, telemetry in trainers_data.items():
        events = []
        current_state = 'UN_AVL'
        state_end_times = {'charging': None, 'battery': None}

        # Combine and sort all relevant events
        combined_events = []
        for timestamp, duration in telemetry['charging']:
            start_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            end_dt = start_dt + timedelta(seconds=duration)
            combined_events.append((start_dt, 'charging_on'))
            combined_events.append((end_dt, 'charging_off'))

        if event_level == 3:
            for timestamp, duration in telemetry['eligible_battery_level']:
                start_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                end_dt = start_dt + timedelta(seconds=duration)
                combined_events.append((start_dt, 'battery_on'))
                combined_events.append((end_dt, 'battery_off'))

        combined_events.sort()

        # Process events to determine state transitions
        for event_time, event_type in combined_events:
            if event_type == 'charging_on':
                state_end_times['charging'] = event_time + timedelta(seconds=1)
            elif event_type == 'charging_off':
                state_end_times['charging'] = None
            elif event_type == 'battery_on':
                state_end_times['battery'] = event_time + timedelta(seconds=1)
            elif event_type == 'battery_off':
                state_end_times['battery'] = None

            new_state = 'UN_AVL'
            if state_end_times['charging']:
                new_state = 'AVL_TRAIN'
            elif event_level == 3 and state_end_times['battery']:
                new_state = 'AVL_EVAL'

            if new_state != current_state:
                events.append((event_time.strftime('%Y-%m-%d %H:%M:%S'), new_state))
                current_state = new_state

        if event_level == 2:
            property_name = f'TRN_AVL_EVENTS_{event_level}_LEVEL'
        else:
            property_name = f'TRN_AVL_EVENTS_{event_level}_LEVEL_{battery_threshold}'
        telemetry[property_name] = events

# generate_trainer_events(new_trainers_data, 3, 50)
# generate_trainer_events(new_trainers_data, 2, 0)
generate_trainer_events(new_trainers_data, 3, 75)

In [None]:
# new_trainers_data['946']['TRN_AVL_EVENTS_3_LEVEL_50']
new_trainers_data['946']['TRN_AVL_EVENTS_2_LEVEL']

In [None]:
# Using the TRN_AVL_EVENTS_TS property, write a function to get the
# count of trainers in each state for each second. A trainer in
# train state is also counted in eval at the same time; but a trainer
# in eval state is just counted in eval. After getting
# the counts per second of the trainers, plot it on the time line as
# time vs percentage of trainers in (a) AVL_TRAIN state, (b) AVL_EVAL
# state. Also get the ratio of the area under the curves of the two
# states. Annotate the plot with the ratio.

def count_trainers_in_states(trainers_data):
    state_counts = defaultdict(lambda: {'AVL_TRAIN': 0, 'AVL_EVAL': 0})

    for telemetry in trainers_data.values():
        current_state = None
        for timestamp, state in telemetry['TRN_AVL_EVENTS_3_LEVEL_75']:
            dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
            if current_state:
                for second in range(int((dt - prev_dt).total_seconds())):
                    current_time = prev_dt + timedelta(seconds=second)
                    if current_state == 'AVL_TRAIN':
                        state_counts[current_time]['AVL_TRAIN'] += 1
                        state_counts[current_time]['AVL_EVAL'] += 1
                    elif current_state == 'AVL_EVAL':
                        state_counts[current_time]['AVL_EVAL'] += 1
            prev_dt = dt
            current_state = state

    return state_counts

state_counts = count_trainers_in_states(new_trainers_data)

def plot_state_counts(state_counts, total_trainers):
    timestamps = sorted(state_counts.keys())
    train_counts = [state_counts[ts]['AVL_TRAIN'] / total_trainers * 100 for ts in timestamps]
    eval_counts = [state_counts[ts]['AVL_EVAL'] / total_trainers * 100 for ts in timestamps]

    area_train = simps(train_counts, dx=1)
    area_eval = simps(eval_counts, dx=1)
    ratio = area_eval / area_train

    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, train_counts, label='AVL_TRAIN (%)', color='blue')
    plt.plot(timestamps, eval_counts, label='AVL_EVAL (%)', color='green')
    plt.xlabel('Timestamp')
    plt.ylabel('Trainers (%)')
    plt.title('Trainers in AVL_TRAIN and AVL_EVAL States Over Time')
    plt.legend()
    plt.grid(True)
    plt.annotate(f'Ratio: {ratio:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=12, color='red', 
                 bbox=dict(facecolor='white', alpha=0.8))
    plt.show()

plot_state_counts(state_counts, N)

In [None]:
new_trainers_data['962'].keys()

In [60]:
# Store the new trainers data in a JSON file

# NOTE: Commented out for safety, should not get overwritten
# with open('new_trainers_data.json', 'w') as outfile:
#     json.dump(new_trainers_data, outfile, indent=4)

In [50]:
# Read the JSON file back into a dictionary

with open('new_trainers_data.json', 'r') as infile:
    new_trainers_data = json.load(infile)

In [51]:
from datetime import datetime, timedelta

# Process all events in TRN_AVL_EVENTS_3_LEVEL_50,
# TRN_AVL_EVENTS_3_LEVEL_75 and TRN_AVL_EVENTS_2_LEVEL for all
# trainers such that the timestamps are adjusted to reflect the number
# of seconds elapsed since 2020-01-29 18:00:00. Discard the all but
# the last event
# before this timestamp. Add an event at the
# start so that the trainer is AVL_TRAIN for the first 5 minutes. It
# should go back to the last state before 2020-01-29 18:00:00 after
# the first 5 minutes.
# After that it should follow the events as per the original data.

def adjust_events(trainers_data):
    base_time = datetime(2020, 1, 29, 18, 0, 0)
    five_minutes = timedelta(minutes=5)

    for telemetry in trainers_data.values():
        for key in ['TRN_AVL_EVENTS_3_LEVEL_50', 'TRN_AVL_EVENTS_3_LEVEL_75', 'TRN_AVL_EVENTS_2_LEVEL']:
            if key not in telemetry:
                continue

            events = telemetry[key]
            adjusted_events = []
            last_state_before_base_time = 'UN_AVL'
            last_event_time = None
            add_five_minutes_event = True

            # Adjust timestamps and find the last state before base_time
            for timestamp, state in events:
                event_time = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
                if event_time < base_time:
                    last_state_before_base_time = state
                    last_event_time = event_time
                else:
                    elapsed_seconds = int((event_time - base_time).total_seconds())
                    adjusted_events.append((elapsed_seconds, state))
                    if elapsed_seconds < int(five_minutes.total_seconds()):
                        add_five_minutes_event = False

            # Add initial AVL_TRAIN event for the first 5 minutes
            adjusted_events.insert(0, (0, 'AVL_TRAIN'))
            # Add event to revert to the last state before base_time
            # after 5 minutes
            if add_five_minutes_event:
                adjusted_events.insert(1, (int(five_minutes.total_seconds()), last_state_before_base_time))

            telemetry[key] = adjusted_events

adjust_events(new_trainers_data)

In [52]:
# Sanity check. All elements in the TRN_AVL_EVENTS_3_LEVEL_50,
# TRN_AVL_EVENTS_3_LEVEL_75 and TRN_AVL_EVENTS_2_LEVEL should have the
# timestamps in the ascending order. if it fails, print the trainer
# id. Write a function to perform this check.

def check_timestamps_order(trainers_data):
    for guid, telemetry in trainers_data.items():
        for key in ['TRN_AVL_EVENTS_3_LEVEL_50', 'TRN_AVL_EVENTS_3_LEVEL_75', 'TRN_AVL_EVENTS_2_LEVEL']:
            if key not in telemetry:
                continue
            events = telemetry[key]
            timestamps = [event[0] for event in events]
            if timestamps != sorted(timestamps):
                print(f"Trainer {guid} has unordered timestamps in {key}")

check_timestamps_order(new_trainers_data)


In [None]:
new_trainers_data['802']['TRN_AVL_EVENTS_3_LEVEL_50']

In [None]:
# Create a timeline plot using the adjusted events in the property
# TRN_AVL_EVENTS_3_LEVEL_75 for all trainers. The x-axis should be the
# number of seconds elapsed since 0 and the y-axis
# should be the percentage of trainers in each state. Note that when a
# trainer is in AVL_TRAIN state, it should also
# counted in AVL_EVAL state for that timestamp. The plot should
# show the percentage of trainers in each state at any given point in
# time i.e. every 1 second. Overlay this plot with the plot of the
# property TRN_AVL_EVENTS_3_LEVEL_50 and TRN_AVL_EVENTS_2_LEVEL. 

def plot_trainer_availability(trainers_data, total_trainers):
    state_counts_75 = defaultdict(lambda: {'AVL_TRAIN': 0, 'AVL_EVAL': 0, 'UN_AVL': 0})
    state_counts_50 = defaultdict(lambda: {'AVL_TRAIN': 0, 'AVL_EVAL': 0, 'UN_AVL': 0})
    state_counts_2 = defaultdict(lambda: {'AVL_TRAIN': 0, 'UN_AVL': 0})

    for telemetry in trainers_data.values():
        current_state_75 = 'UN_AVL'
        current_state_50 = 'UN_AVL'
        current_state_2 = 'UN_AVL'
        prev_dt_75 = None
        prev_dt_50 = None
        prev_dt_2 = None

        for timestamp, state in telemetry['TRN_AVL_EVENTS_3_LEVEL_75']:
            dt = int(timestamp)
            if prev_dt_75 is not None:
                for second in range(prev_dt_75, dt):
                    state_counts_75[second][current_state_75] += 1
                    if current_state_75 == 'AVL_TRAIN':
                        state_counts_75[second]['AVL_EVAL'] += 1
            prev_dt_75 = dt
            current_state_75 = state

        for timestamp, state in telemetry['TRN_AVL_EVENTS_3_LEVEL_50']:
            dt = int(timestamp)
            if prev_dt_50 is not None:
                for second in range(prev_dt_50, dt):
                    state_counts_50[second][current_state_50] += 1
                    if current_state_50 == 'AVL_TRAIN':
                        state_counts_50[second]['AVL_EVAL'] += 1
            prev_dt_50 = dt
            current_state_50 = state

        for timestamp, state in telemetry['TRN_AVL_EVENTS_2_LEVEL']:
            dt = int(timestamp)
            if prev_dt_2 is not None:
                for second in range(prev_dt_2, dt):
                    state_counts_2[second][current_state_2] += 1
            prev_dt_2 = dt
            current_state_2 = state

    timestamps = sorted(state_counts_75.keys())
    train_counts_75 = [state_counts_75[ts]['AVL_TRAIN'] / total_trainers * 100 for ts in timestamps]
    eval_counts_75 = [state_counts_75[ts]['AVL_EVAL'] / total_trainers * 100 for ts in timestamps]
    un_avl_counts_75 = [state_counts_75[ts]['UN_AVL'] / total_trainers * 100 for ts in timestamps]

    timestamps_50 = sorted(state_counts_50.keys())
    train_counts_50 = [state_counts_50[ts]['AVL_TRAIN'] / total_trainers * 100 for ts in timestamps_50]
    eval_counts_50 = [state_counts_50[ts]['AVL_EVAL'] / total_trainers * 100 for ts in timestamps_50]
    un_avl_counts_50 = [state_counts_50[ts]['UN_AVL'] / total_trainers * 100 for ts in timestamps_50]

    timestamps_2 = sorted(state_counts_2.keys())
    train_counts_2 = [state_counts_2[ts]['AVL_TRAIN'] / total_trainers * 100 for ts in timestamps_2]
    un_avl_counts_2 = [state_counts_2[ts]['UN_AVL'] / total_trainers * 100 for ts in timestamps_2]

    plt.figure(figsize=(12, 6))
    plt.plot(timestamps, train_counts_75, label='AVL_TRAIN 75% (%)', color='blue')
    plt.plot(timestamps, eval_counts_75, label='AVL_EVAL 75% (%)', color='green')
    plt.plot(timestamps_50, train_counts_50, label='AVL_TRAIN 50% (%)', color='cyan')
    plt.plot(timestamps_50, eval_counts_50, label='AVL_EVAL 50% (%)', color='magenta')
    plt.plot(timestamps_2, train_counts_2, label='AVL_TRAIN 2_LEVEL (%)', color='purple')

    plt.xlabel('Seconds Elapsed')
    plt.ylabel('Trainers (%)')
    plt.title('Trainer Availability Over Time')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_trainer_availability(new_trainers_data, N)

In [None]:
len(new_trainers_data)

In [None]:
new_trainers_data['802']['TRN_AVL_EVENTS_3_LEVEL_75']

### Create 6Day jsons with new availability events

In [59]:
import json
import random

# Add the newly generated properties to the json files specified on
# the path:
# ./async_cifar10/trainer/config_dir100_num300_traceFail_6d_oort/
# which has 300 trainer files named trainer_X.json where X is the
# trainer number. First seed the random number generator with 42 and
# select 300 trainer ids from the new_trainers_data. For each trainer
# among the selected 300 trainers from new_trainers_data, update one
# json file i.e. trainer_X.json and
# add 3 properties within the hyperpameters key:
# TRN_AVL_EVENTS_3_LEVEL_50, TRN_AVL_EVENTS_3_LEVEL_75, and
# TRN_AVL_EVENTS_2_LEVEL with the chosen trainer's values as string.
# this updated json file should be saved in the path specified above
# with the same name.

# Seed the random number generator
random.seed(42)

# Select 300 trainer ids from the new_trainers_data
selected_trainers = random.sample(list(new_trainers_data.keys()), 300)

# Path to the directory containing the trainer files
path = '../async_cifar10/trainer/config_dir100_num300_traceFail_6d_oort/'

# Update each selected trainer's JSON file
for i, trainer_id in enumerate(selected_trainers):
    # Load the existing JSON file
    file_path = f'{path}trainer_{i+1}.json'
    with open(file_path, 'r') as file:
        trainer_data = json.load(file)

    # Add the new properties within the hyperparameters key
    if 'hyperparameters' not in trainer_data:
        trainer_data['hyperparameters'] = {}

    trainer_data['hyperparameters']['avl_events_3_state_50'] = str(new_trainers_data[trainer_id]['TRN_AVL_EVENTS_3_LEVEL_50'])
    trainer_data['hyperparameters']['avl_events_3_state_75'] = str(new_trainers_data[trainer_id]['TRN_AVL_EVENTS_3_LEVEL_75'])
    trainer_data['hyperparameters']['avl_events_2_state'] = str(new_trainers_data[trainer_id]['TRN_AVL_EVENTS_2_LEVEL'])
    
    # delete two_state_unavl_durations_s if it exists as a property in
    # hyperparameters of the json file
    if 'two_state_unavl_durations_s' in trainer_data['hyperparameters']:
        del trainer_data['hyperparameters']['two_state_unavl_durations_s']
    
    # delete client_avail_aware_notify if it exists as a property in
    # hyperparameters of the json file
    # Add client_notify property within hyperparameters instead and
    # set it to the following value: { "enabled": "True", "type":
    # "three_state" }
    if 'client_avail_aware_notify' in trainer_data['hyperparameters']:
        del trainer_data['hyperparameters']['client_avail_aware_notify']
    trainer_data['hyperparameters']['client_notify'] = {"enabled": "True", "type": "three_state"}
    
    # Add wait_until_next_avl as a hyperparameter property and set it
    # to True
    trainer_data['hyperparameters']['wait_until_next_avl'] = "True"

    # Save the updated JSON file
    with open(file_path, 'w') as file:
        json.dump(trainer_data, file, indent=4)

In [None]:
new_trainers_data['300']['TRN_AVL_EVENTS_3_LEVEL_50']