In [1]:
import json
import os
import glob
import pandas as pd

# Get the current directory where this script is located
current_dir = os.getcwd()

# Find all JSON files in the current directory
json_files = glob.glob(os.path.join(current_dir, "*.json"))

# List to collect summary stats for each file
summary_data = []

# Loop through each JSON file
for file_path in json_files:
    with open(file_path, "r") as f:
        data = json.load(f)  # List of objects

        # Initialize accumulators for this file
        total_cpu_time_sum = 0
        total_process_memory_sum = 0
        execution_time_sum = 0
        throughput_bps_sum = 0
        count = len(data)

        for obj in data:
            total_cpu_time_sum += obj.get("total_cpu_time (seconds)", 0)
            total_process_memory_sum += obj.get("total_process_memory (MB)", 0)
            execution_time_sum += obj.get("execution_time (seconds/batch)", 0)
            throughput_bps_sum += obj.get("throughput_bps", 0)

        # Compute averages (avoid division by zero)
        if count > 0:
            avg_cpu_time = total_cpu_time_sum / count
            avg_total_process_memory = total_process_memory_sum / count
            avg_execution_time = execution_time_sum / count
            avg_throughput_bps = throughput_bps_sum / count
        else:
            avg_cpu_time = avg_cpu_memory = avg_execution_time = avg_throughput_bps = 0

        # Append summary for this file
        summary_data.append({
            "filename": os.path.basename(file_path),
            "avg_cpu_time": avg_cpu_time,
            "avg_process_memory": avg_total_process_memory,
            "avg_throughput_bps": avg_throughput_bps,
            "record_count": count
        })

# Create a DataFrame from the summary data
summary_df = pd.DataFrame(summary_data)

# Display the summary DataFrame
print(summary_df)

                filename  avg_cpu_time  avg_process_memory  \
0   100MB-1file-128.json     25.759324         2138.394531   
1   100MB-1file-256.json     20.604544         4361.402344   
2    100MB-1File-32.json    225.481818         2738.230469   
3   100MB-1file-512.json     21.341026         4517.609375   
4    100MB-1file-64.json     22.514245         1681.699219   
5     100MB-1GB-256.json     24.066571         4313.574219   
6      10MB-1GB-256.json      3.328900         1186.802790   
7      10MB-2GB-256.json      3.092958         1190.260110   
8      10MB-4GB-256.json      2.751213         1180.918400   
9       10MB-4GB-64.json      4.244186          743.794338   
10     10MB-6GB-256.json      2.799368         1186.847075   
11     10MB-8GB-256.json      3.601294         1187.793519   
12    25MB-10GB-256.json      6.459697         1737.696618   
13    25MB-12GB-256.json      6.391909         1730.849077   
14     25MB-1GB-256.json      6.465970         1720.837426   
15     2

In [25]:
import json
import os
import glob
import pandas as pd

current_dir = os.getcwd()
json_files = glob.glob(os.path.join(current_dir, "*.json"))
summary_data = []

for file_path in json_files:
    with open(file_path, "r") as f:
        data = json.load(f)

        # Initialize accumulators
        total_cpu_time_sum = 0
        total_process_memory_sum = 0
        total_execution_time_sum = 0
        total_throughput_bps_sum = 0

        cpu_time_count = 0
        process_memory_count = 0
        execution_time_count = 0
        throughput_bps_count = 0

        for obj in data:
            if "total_cpu_time (seconds)" in obj:
                total_cpu_time_sum += obj["total_cpu_time (seconds)"]
                cpu_time_count += 1

            if "total_process_memory (MB)" in obj:
                total_process_memory_sum += obj["total_process_memory (MB)"]
                process_memory_count += 1

            if "execution_time (seconds/batch)" in obj:
                total_execution_time_sum += obj["execution_time (seconds/batch)"]
                execution_time_count += 1

            if "throughput_bps" in obj:
                total_throughput_bps_sum += obj["throughput_bps"]
                throughput_bps_count += 1

        # Compute averages with separate counts
        avg_cpu_time = total_cpu_time_sum / cpu_time_count if cpu_time_count > 0 else 0
        avg_process_memory = total_process_memory_sum / process_memory_count if process_memory_count > 0 else 0
        avg_execution_time = total_execution_time_sum / execution_time_count if execution_time_count > 0 else 0
        avg_throughput_bps = total_throughput_bps_sum / throughput_bps_count if throughput_bps_count > 0 else 0

        # Append summary for this file
        summary_data.append({
            "filename": os.path.basename(file_path),
            "avg_cpu_time": avg_cpu_time,
            "avg_process_memory": avg_process_memory,
            "avg_execution_time": avg_execution_time,
            "avg_throughput_bps": avg_throughput_bps,
            "cpu_time_count": cpu_time_count,
            "process_memory_count": process_memory_count})

In [14]:

# Create a DataFrame from the summary data
summary_df = pd.DataFrame(summary_data)

# Display the summary DataFrame
print(summary_df)


                filename  avg_cpu_time  avg_process_memory  avg_throughput_bps
0   100MB-1file-128.json     25.759324         2138.394531        3.258432e+07
1   100MB-1file-256.json     20.604544         4361.402344        4.073616e+07
2    100MB-1File-32.json    225.481818         2738.230469        3.722473e+06
3   100MB-1file-512.json     21.341026         4517.609375        3.933035e+07
4    100MB-1file-64.json     22.514245         1681.699219        3.728084e+07
5     100MB-1GB-256.json     24.066571         4313.574219        3.535165e+07
6      10MB-1GB-256.json      3.328900         1186.802790        2.607668e+07
7      10MB-2GB-256.json      3.092958         1190.260110        2.886155e+07
8      10MB-4GB-256.json      2.751213         1180.918400        3.301102e+07
9      10MB-6GB-256.json      2.799368         1186.847075        3.219871e+07
10     10MB-8GB-256.json      3.601294         1187.793519        2.493032e+07
11    25MB-10GB-256.json      6.937246          190.

In [11]:
num_requests = 110
duration = 7.1 # seconds
memory_allocated = 1.78 # GB 
storage = 2 # GB

compute_per_GB_s = 0.0000166667 # USD per GB/s
GB_s = num_requests * duration * memory_allocated # compute power used in GB/s
compute_charges = GB_s * compute_per_GB_s

charges_per_request = 2e-7
request_charges = num_requests * charges_per_request

storage_per_GB_s = 3.09e-8 # USD per GB/s
storage_charges = (storage - 0.5)* (num_requests * duration) *storage_per_GB_s if storage > 0.5 else 0

total_charges = compute_charges + request_charges +  storage_charges

print(f'Compute charges: ${compute_charges:.3g}, Request charges: ${request_charges:.3g}, Storage charges: ${storage_charges:.3g}')
print(f'Total charges: ${total_charges:.3g}.')

Compute charges: $0.0232, Request charges: $2.2e-05, Storage charges: $3.62e-05
Total charges: $0.0232.


In [4]:
{
  "bucket": "cosmicai-data",
  "file_limit": "106",
  "batch_size": 265,
  "object_type": "folder",
  "S3_object_name": "Anomaly Detection",
  "script": "/tmp/Anomaly Detection/Inference/inference.py",
  "result_path": "scaling/result-partition-75MB/8GB/2",
  "data_bucket": "cosmicai-data",
  "data_prefix": "75MB"
}

{'bucket': 'cosmicai-data',
 'file_limit': '106',
 'batch_size': 265,
 'object_type': 'folder',
 'S3_object_name': 'Anomaly Detection',
 'script': '/tmp/Anomaly Detection/Inference/inference.py',
 'result_path': 'scaling/result-partition-75MB/8GB/2',
 'data_bucket': 'cosmicai-data',
 'data_prefix': '75MB'}