In [24]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [25]:
# open four csv files: job_times_{baseline, cap, danish, decima}.csv
# parse datetimes for the 'start_time' and 'end_time' columns

baseline = pd.read_csv('job_times_baseline.csv', parse_dates=['start_time', 'end_time'])
cap = pd.read_csv('job_times_cap.csv', parse_dates=['start_time', 'end_time'])
danish = pd.read_csv('job_times_danish.csv', parse_dates=['start_time', 'end_time'])
decima = pd.read_csv('job_times_decima.csv', parse_dates=['start_time', 'end_time'])

# drop last few rows that have no data
baseline = baseline.dropna()
cap = cap.dropna()
danish = danish.dropna()
decima = decima.dropna()

# view the head of each
print(baseline.head())
print(cap.head())
print(danish.head())
print(decima.head())

   job_id                 start_time                   end_time  \
0       0 2024-12-01 16:02:58.054007 2024-12-01 16:06:24.486653   
1       1 2024-12-01 16:03:00.063833 2024-12-01 16:06:24.486720   
2       2 2024-12-01 16:03:02.073445 2024-12-01 16:06:24.486744   
3       3 2024-12-01 16:06:24.493353 2024-12-01 16:09:51.503236   
4       4 2024-12-01 16:06:26.501406 2024-12-01 16:09:51.503267   

   carbon_footprint  
0          1703.164  
1          1803.340  
2          1803.340  
3          1779.782  
4          1779.782  
   job_id                 start_time                   end_time  \
0       0 2024-11-30 20:31:00.361572 2024-11-30 20:35:43.636349   
1       1 2024-11-30 20:31:02.370081 2024-11-30 20:35:43.636510   
2       2 2024-11-30 20:31:04.380137 2024-11-30 20:35:43.636570   
3       3 2024-11-30 20:35:43.643893 2024-11-30 20:40:26.781255   
4       4 2024-11-30 20:35:45.650031 2024-11-30 20:40:26.781363   

   carbon_footprint  
0           4007.58  
1           4007.5

In [26]:
# for each of the four dataframes, compute the total carbon footprint for the first 20 rows (sum of all the carbon footprints)
total_carbon_baseline = baseline['carbon_footprint'].head(20).sum()
total_carbon_cap = cap['carbon_footprint'].head(20).sum()
total_carbon_danish = danish['carbon_footprint'].head(20).sum()
total_carbon_decima = decima['carbon_footprint'].head(20).sum()

# print the results
print('Total Carbon Footprint for baseline:', total_carbon_baseline)
print('Total Carbon Footprint for cap:', total_carbon_cap)
print('Total Carbon Footprint for danish:', total_carbon_danish)
print('Total Carbon Footprint for decima:', total_carbon_decima)


Total Carbon Footprint for baseline: 40031.06200000001
Total Carbon Footprint for cap: 23556.36
Total Carbon Footprint for danish: 24248.949999999997
Total Carbon Footprint for decima: 10990.52


In [27]:
# for each of the four dataframes, compute the total time
# total time is the difference between start_time in the first row and the end_time in the last row
# all of these are datetime objects
total_time_baseline = baseline['end_time'].iloc[-1] - baseline['start_time'].iloc[0]
total_time_cap = cap['end_time'].iloc[-1] - cap['start_time'].iloc[0]
total_time_danish = danish['end_time'].iloc[-1] - danish['start_time'].iloc[0]
total_time_decima = decima['end_time'].iloc[-1] - decima['start_time'].iloc[0]

# print the results (just the hours, minutes, etc -- days are irrelevant)
print('Total Time for baseline:   ', total_time_baseline)
print('Total Time for cap:        ', total_time_cap)
print('Total Time for danish:     ', total_time_danish)
print('Total Time for decima:     ', total_time_decima)

Total Time for baseline:    0 days 00:29:58.018670
Total Time for cap:         0 days 00:09:26.419806
Total Time for danish:      0 days 00:32:49.572784
Total Time for decima:      0 days 00:35:00.341596
