In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib.dates as mdates
plt.style.use('ggplot')
sns.set(font_scale=1.2)
sns.set_style("whitegrid")

def load_data(file_path):
    columns = ['timestamp', 'node_name', 'status', 'labels', 'current_job']
    df = pd.read_csv(file_path, names=columns, header=0)
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')
    df['current_job'] = df['current_job'].replace('None', np.nan)
    # Replace the labels using the logic: if docker and gpu-reliable exists in labels use linux-gpu
    def apply_label_rules(labels):
        if 'docker' in labels and 'gpu-reliable' in labels:
            return 'linux-gpu'
        elif 'docker' in labels and 'gpu-reliable' not in labels:
            return 'linux'
        elif 'win' in labels and 'gpu-reliable' in labels:
            return 'windows-gpu'
        elif 'win' in labels and 'gpu-reliable' not in labels:
            return 'windows'        
        elif 'osx' in labels:
            for label in labels.split(', '):
                if label.startswith('osx_'):
                    return label
        elif 'linux-arm64' in labels:
            return 'linux-arm64'          
        else:
            return labels

    df['labels'] = df['labels'].apply(apply_label_rules)     
    return df

def calculate_node_stats(df):
    nodes = df['node_name'].unique()
    node_stats = {}
    
    for node in nodes:
        node_df = df[df['node_name'] == node].sort_values('timestamp')
        first_seen = node_df['timestamp'].min()
        last_seen = node_df['timestamp'].max()
        time_alive = (last_seen - first_seen).total_seconds() / 60
        
        busy_records = node_df[node_df['status'] == 'Busy']
        busy_periods = []
        if not busy_records.empty:
            node_df['status_changed'] = node_df['status'] != node_df['status'].shift(1)
            status_changes = node_df[node_df['status_changed']].copy()
            last_records = node_df.groupby('status').last().reset_index()
            status_change_points = pd.concat([status_changes, last_records]).sort_values('timestamp')
            
            current_status = None
            start_time = None
            
            for _, row in status_change_points.iterrows():
                if current_status == 'Busy' and start_time is not None:
                    duration = (row['timestamp'] - start_time).total_seconds() / 60
                    busy_periods.append({
                        'start': start_time,
                        'end': row['timestamp'],
                        'duration': duration,
                        'job': row['current_job']
                    })
                
                current_status = row['status']
                start_time = row['timestamp']
        
        total_busy_time = sum(period['duration'] for period in busy_periods)
        busy_percentage = (total_busy_time / time_alive * 100) if time_alive > 0 else 0
        
        node_stats[node] = {
            'first_seen': first_seen,
            'last_seen': last_seen,
            'time_alive': time_alive,
            'busy_time': total_busy_time,
            'idle_time': time_alive - total_busy_time,
            'busy_percentage': busy_percentage,
            'busy_periods': busy_periods,
            'status_history': node_df[['timestamp', 'status', 'current_job']].to_dict('records'),
            'labels': node_df['labels'].iloc[0]
        }
    
    return node_stats

def generate_node_report(node_stats):
    sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['busy_percentage'], reverse=True)
    report_data = []
    for node, stats in sorted_nodes:
        report_data.append({
            'Node Name': node,
            'Time Alive (min)': round(stats['time_alive'], 2),
            'Busy Time (min)': round(stats['busy_time'], 2),
            'Idle Time (min)': round(stats['idle_time'], 2),
            'Busy (%)': round(stats['busy_percentage'], 2),
            'First Seen': stats['first_seen'],
            'Last Seen': stats['last_seen'],
            'Number of Jobs': len(stats['busy_periods']),
            'Labels': stats['labels']
        })
    
    report_df = pd.DataFrame(report_data)
    return report_df

def analyze_jenkins_nodes(df, title='Jenkins Nodes Activity'):
    node_stats = calculate_node_stats(df)
    report = generate_node_report(node_stats)
    display(report.style.set_caption(title).set_table_styles(
        [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},
         {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},
         {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]
    ).set_properties(**{'text-align': 'center'}))
    
    print("Node Activity Report:")
    print("===================")
    print(report)
    

    
    return df, node_stats, report


df = load_data("agent_data.csv")
# Absolute
# df, node_stats, report = analyze_jenkins_nodes(df)
# filter data to show only workdays
df_workdays = df[df['timestamp'].dt.dayofweek < 5]
df_workdays, node_stats_workdays, report = analyze_jenkins_nodes(df_workdays, title='Jenkins Nodes Activity (Workdays)')
# filter data to show only weekends
df_weekends = df[df['timestamp'].dt.dayofweek >= 5]
df_weekends, node_stats_weekends, report = analyze_jenkins_nodes(df_weekends, title='Jenkins Nodes Activity (Weekends)')

# Generate daily hourly activity report. Group the data by day and hour and calculate the busy/idle times
def calculate_hourly_stats(df):
    hourly_stats = {}
    for _, row in df.iterrows():
        day = row['timestamp'].date()
        hour = row['timestamp'].hour
        if day not in hourly_stats:
            hourly_stats[day] = {}
        
        if hour not in hourly_stats[day]:
            hourly_stats[day][hour] = {
                'busy_time': 0,
                'idle_time': 0
            }
        
        if row['status'] == 'Busy':
            hourly_stats[day][hour]['busy_time'] += 1
        else:
            hourly_stats[day][hour]['idle_time'] += 1
    
    return hourly_stats

hourly_stats_workdays = calculate_hourly_stats(df_workdays)
hourly_stats_weekends = calculate_hourly_stats(df_weekends)

# With the hourly stats calculate the percentage of busy time for each hour of the day
#f from all days
def calculate_hourly_percentage(hourly_stats, title='Jenkins Nodes Hourly Activity'):
    hourly_percentage = {}
    for day, stats in hourly_stats.items():
        for hour, values in stats.items():
            if hour not in hourly_percentage:
                hourly_percentage[hour] = {
                    'busy_time': 0,
                    'idle_time': 0
                }
            
            hourly_percentage[hour]['busy_time'] += values['busy_time']
            hourly_percentage[hour]['idle_time'] += values['idle_time']
    
    for hour, values in hourly_percentage.items():
        total_time = values['busy_time'] + values['idle_time']
        values['busy_percentage'] = (values['busy_time'] / total_time * 100) if total_time > 0 else 0
    
    # print the hourly percentage in a nice table
    hourly_percentage_df = pd.DataFrame(hourly_percentage).T
    hourly_percentage_df = hourly_percentage_df.rename_axis('Hour').reset_index()
    hourly_percentage_df = hourly_percentage_df[['Hour', 'busy_time', 'idle_time', 'busy_percentage']]
    display(hourly_percentage_df.style.set_caption(title).set_table_styles(
        [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},
         {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},
         {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]
    ).set_properties(**{'text-align': 'center'}))


    
    return hourly_percentage


hourly_percentage_workdays = calculate_hourly_percentage(hourly_stats_workdays, title='Jenkins Nodes Hourly Activity (Workdays)')
hourly_percentage_weekends = calculate_hourly_percentage(hourly_stats_weekends, title='Jenkins Nodes Hourly Activity (Weekends)')
    
# Group by labels and recalculate the busy/idle times proportionally the time alive of each node
def calculate_label_stats(node_stats):
    label_stats = {}
    for node, stats in node_stats.items():
        label = stats['labels']
        if label not in label_stats:
            label_stats[label] = {
                'time_alive': 0,
                'busy_time': 0,
                'idle_time': 0
            }
        
        label_stats[label]['time_alive'] += stats['time_alive']
        label_stats[label]['busy_time'] += stats['busy_time']
        label_stats[label]['idle_time'] += stats['idle_time']
    
    for label, stats in label_stats.items():
        stats['busy_percentage'] = (stats['busy_time'] / stats['time_alive'] * 100) if stats['time_alive'] > 0 else 0
    
    return label_stats

def generate_label_report(node_stats, title='Jenkins Nodes Activity by Labels'):
    label_stats = calculate_label_stats(node_stats)
    label_report = pd.DataFrame(label_stats).T
    label_report = label_report.sort_values('busy_percentage', ascending=False)
    label_report = label_report.rename_axis('Labels').reset_index()
    label_report = label_report[['Labels', 'time_alive', 'busy_time', 'idle_time', 'busy_percentage']]
    display(label_report.style.set_caption(title).set_table_styles(
        [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},
         {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},
         {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]
    ).set_properties(**{'text-align': 'center'}))
    return label_report

label_report_workdays = generate_label_report(node_stats_workdays, title='Jenkins Nodes Activity by Labels (workdays)')
label_report_weekends = generate_label_report(node_stats_weekends, title='Jenkins Nodes Activity by Labels (weekends)')


Unnamed: 0,Node Name,Time Alive (min),Busy Time (min),Idle Time (min),Busy (%),First Seen,Last Seen,Number of Jobs,Labels
0,linux-ec2.nv.focal-3865fe66,104.83,104.83,0.0,100.0,2025-03-14 17:50:10,2025-03-14 19:35:00,1,linux-gpu
1,linux-ec2.nv.focal-be0e29be,725.0,565.57,159.43,78.01,2025-03-14 05:35:01,2025-03-14 17:40:01,4,linux-gpu
2,linux-ec2.nv.focal-e2106c45,7170.03,5073.0,2097.03,70.75,2025-03-14 19:40:00,2025-03-19 19:10:02,45,linux-gpu
3,linux-ec2.nv.focal-5ae870bd,1715.0,1125.1,589.9,65.6,2025-03-13 00:45:00,2025-03-14 05:20:00,13,linux-gpu
4,linux-ec2.nv.focal-cbfb18c1,1940.17,1197.33,742.83,61.71,2025-03-12 21:30:02,2025-03-14 05:50:12,25,linux-gpu
5,linux-ec2.nv.focal-96fbd432,340.0,184.92,155.08,54.39,2025-03-17 12:10:00,2025-03-17 17:50:00,3,linux-gpu
6,linux-ec2.nv.focal-0cffe680,4095.0,2022.07,2072.93,49.38,2025-03-17 20:25:00,2025-03-20 16:40:00,59,linux-gpu
7,linux-ec2.nv.focal-994d8826,1274.78,549.18,725.6,43.08,2025-03-19 19:25:13,2025-03-20 16:40:00,24,linux-gpu
8,win-ec2.nv.WinS22-c738539f,16969.97,7188.83,9781.13,42.36,2025-03-12 18:55:02,2025-03-24 13:45:00,112,windows-gpu
9,linux-ip-172-30-1-34.focal-3bd74cfe,99.98,26.22,73.77,26.22,2025-03-12 18:55:02,2025-03-12 20:35:01,2,linux


Node Activity Report:
                               Node Name  Time Alive (min)  Busy Time (min)  \
0            linux-ec2.nv.focal-3865fe66            104.83           104.83   
1            linux-ec2.nv.focal-be0e29be            725.00           565.57   
2            linux-ec2.nv.focal-e2106c45           7170.03          5073.00   
3            linux-ec2.nv.focal-5ae870bd           1715.00          1125.10   
4            linux-ec2.nv.focal-cbfb18c1           1940.17          1197.33   
5            linux-ec2.nv.focal-96fbd432            340.00           184.92   
6            linux-ec2.nv.focal-0cffe680           4095.00          2022.07   
7            linux-ec2.nv.focal-994d8826           1274.78           549.18   
8             win-ec2.nv.WinS22-c738539f          16969.97          7188.83   
9    linux-ip-172-30-1-34.focal-3bd74cfe             99.98            26.22   
10           linux-ec2.nv.focal-28173d4a            344.97            70.02   
11           linux-ec2.nv.foca

Unnamed: 0,Node Name,Time Alive (min),Busy Time (min),Idle Time (min),Busy (%),First Seen,Last Seen,Number of Jobs,Labels
0,linux-ec2.nv.focal-078bcd6c,2875.0,1935.87,939.13,67.33,2025-03-22 00:00:00,2025-03-23 23:55:00,19,linux-gpu
1,linux-ec2.nv.focal-e2106c45,2871.97,1611.1,1260.87,56.1,2025-03-15 00:03:02,2025-03-16 23:55:00,25,linux-gpu
2,linux-ip-172-30-1-34.focal-2805dcaf,135.0,25.0,110.0,18.52,2025-03-22 00:00:00,2025-03-22 02:15:00,5,linux
3,win-ec2.nv.WinS22-c738539f,12951.97,1220.82,11731.15,9.43,2025-03-15 00:03:02,2025-03-23 23:55:00,37,windows-gpu
4,linux-ip-172-30-1-34.focal-cb0d5d3d,581.97,50.0,531.97,8.59,2025-03-15 00:03:02,2025-03-15 09:45:00,3,linux
5,linux-ip-172-30-1-34.focal-5dd9d5be,2735.0,225.07,2509.93,8.23,2025-03-22 02:20:00,2025-03-23 23:55:00,32,linux
6,linux-ip-172-30-1-34.focal-a1c847a0,2871.97,208.5,2663.47,7.26,2025-03-15 00:03:02,2025-03-16 23:55:00,28,linux
7,linux-ip-172-30-1-34.focal-93247462,2875.0,193.3,2681.7,6.72,2025-03-22 00:00:00,2025-03-23 23:55:00,23,linux
8,linux-ip-172-30-1-34.focal-bf09b343,2284.98,152.52,2132.47,6.67,2025-03-15 09:50:01,2025-03-16 23:55:00,24,linux
9,mac-six.ventura,12951.97,750.52,12201.45,5.79,2025-03-15 00:03:02,2025-03-23 23:55:00,83,osx_ventura


Node Activity Report:
                               Node Name  Time Alive (min)  Busy Time (min)  \
0            linux-ec2.nv.focal-078bcd6c           2875.00          1935.87   
1            linux-ec2.nv.focal-e2106c45           2871.97          1611.10   
2    linux-ip-172-30-1-34.focal-2805dcaf            135.00            25.00   
3             win-ec2.nv.WinS22-c738539f          12951.97          1220.82   
4    linux-ip-172-30-1-34.focal-cb0d5d3d            581.97            50.00   
5    linux-ip-172-30-1-34.focal-5dd9d5be           2735.00           225.07   
6    linux-ip-172-30-1-34.focal-a1c847a0           2871.97           208.50   
7    linux-ip-172-30-1-34.focal-93247462           2875.00           193.30   
8    linux-ip-172-30-1-34.focal-bf09b343           2284.98           152.52   
9                        mac-six.ventura          12951.97           750.52   
10                       mac-five.sonoma          12951.97           695.03   
11                      mac-fo

Unnamed: 0,Hour,busy_time,idle_time,busy_percentage
0,18,174.0,702.0,19.863014
1,19,245.0,876.0,21.855486
2,20,207.0,941.0,18.031359
3,21,193.0,954.0,16.826504
4,22,202.0,950.0,17.534722
5,23,179.0,973.0,15.538194
6,0,145.0,1007.0,12.586806
7,1,215.0,920.0,18.942731
8,2,158.0,994.0,13.715278
9,3,173.0,978.0,15.030408


Unnamed: 0,Hour,busy_time,idle_time,busy_percentage
0,0,41.0,535.0,7.118056
1,1,57.0,519.0,9.895833
2,2,36.0,540.0,6.25
3,3,76.0,500.0,13.194444
4,4,41.0,535.0,7.118056
5,5,36.0,540.0,6.25
6,6,51.0,525.0,8.854167
7,7,30.0,546.0,5.208333
8,8,51.0,509.0,9.107143
9,9,76.0,500.0,13.194444


Unnamed: 0,Labels,time_alive,busy_time,idle_time,busy_percentage
0,windows-gpu,16969.966667,7188.833333,9781.133333,42.362095
1,linux-gpu,33829.766667,12849.883333,20979.883333,37.983955
2,windows,16969.966667,1561.95,15408.016667,9.204202
3,osx_sonoma,16969.966667,1554.75,15415.216667,9.161774
4,osx_ventura,50909.9,4498.2,46411.7,8.83561
5,linux,26817.866667,1802.616667,25015.25,6.7217
6,"core-packages-dfe10fd3, packages, swarm",16969.966667,78.083333,16891.883333,0.460127
7,linux-arm64,16969.966667,0.0,16969.966667,0.0


Unnamed: 0,Labels,time_alive,busy_time,idle_time,busy_percentage
0,linux-gpu,11493.933333,3590.75,7903.183333,31.240393
1,windows-gpu,12951.966667,1220.816667,11731.15,9.425724
2,linux,11483.916667,854.383333,10629.533333,7.439825
3,osx_sonoma,12951.966667,695.033333,12256.933333,5.366238
4,osx_ventura,38855.9,1781.7,37074.2,4.585404
5,windows,12951.966667,302.116667,12649.85,2.332593
6,"core-packages-dfe10fd3, packages, swarm",12951.966667,19.716667,12932.25,0.152229
7,linux-arm64,12951.966667,0.0,12951.966667,0.0


In [7]:
# generate statistics for the busy periods
def generate_busy_periods_report(node_stats, title='Jenkins Nodes Busy Periods'):
    busy_periods_data = []
    for node, stats in node_stats.items():
        for period in stats['busy_periods']:
            busy_periods_data.append({
                'Node Name': node,
                'Start Time': period['start'],
                'End Time': period['end'],
                'Duration (min)': round(period['duration'], 2),
                'Job': period['job']
            })
    
    busy_periods_report = pd.DataFrame(busy_periods_data)
    busy_periods_report = busy_periods_report.sort_values('Start Time')
    display(busy_periods_report.style.set_caption(title).set_table_styles(
        [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},
         {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},
         {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]
    ).set_properties(**{'text-align': 'center'}))
    return busy_periods_report

generate_busy_periods_report(node_stats_workdays, title='Jenkins Nodes Busy Periods (workdays)')

Unnamed: 0,Node Name,Start Time,End Time,Duration (min),Job
325,mac-four.ventura,2025-03-12 18:55:02,2025-03-12 19:05:02,10.0,
702,win-ec2.nv.WinS22-c738539f,2025-03-12 19:00:01,2025-03-12 20:00:01,60.0,
25,linux-ip-172-30-1-34.focal-12a7a400,2025-03-12 19:00:01,2025-03-12 19:05:02,5.02,
24,linux-ip-172-30-1-34.focal-12a7a400,2025-03-12 19:00:01,2025-03-12 19:00:01,0.0,gz_utils4-install-pkg-noble-amd64 #56
19,linux-ec2.nv.focal-28173d4a,2025-03-12 19:10:01,2025-03-12 19:45:02,35.02,
26,linux-ip-172-30-1-34.focal-3bd74cfe,2025-03-12 19:15:01,2025-03-12 19:35:02,20.02,gz_transport-ci-ign-transport11-focal-amd64 #125
27,linux-ip-172-30-1-34.focal-3bd74cfe,2025-03-12 19:35:02,2025-03-12 19:41:14,6.2,
607,win-ec2.nv.WinS22-5a866937,2025-03-12 19:55:01,2025-03-12 20:05:01,10.0,
448,mac-six.ventura,2025-03-12 20:10:01,2025-03-12 20:15:01,5.0,
28,mac-eight.ventura,2025-03-12 20:10:01,2025-03-12 20:40:01,30.0,


Unnamed: 0,Node Name,Start Time,End Time,Duration (min),Job
325,mac-four.ventura,2025-03-12 18:55:02,2025-03-12 19:05:02,10.00,
702,win-ec2.nv.WinS22-c738539f,2025-03-12 19:00:01,2025-03-12 20:00:01,60.00,
25,linux-ip-172-30-1-34.focal-12a7a400,2025-03-12 19:00:01,2025-03-12 19:05:02,5.02,
24,linux-ip-172-30-1-34.focal-12a7a400,2025-03-12 19:00:01,2025-03-12 19:00:01,0.00,gz_utils4-install-pkg-noble-amd64 #56
19,linux-ec2.nv.focal-28173d4a,2025-03-12 19:10:01,2025-03-12 19:45:02,35.02,
...,...,...,...,...,...
813,win-ec2.nv.WinS22-c738539f,2025-03-24 13:20:00,2025-03-24 13:25:10,5.17,
1296,linux-ip-172-30-1-34.focal-93247462,2025-03-24 13:20:00,2025-03-24 13:25:10,5.17,
1311,linux-ip-172-30-1-34.focal-5dd9d5be,2025-03-24 13:30:00,2025-03-24 13:35:00,5.00,
1310,linux-ip-172-30-1-34.focal-5dd9d5be,2025-03-24 13:30:00,2025-03-24 13:30:00,0.00,gz_launch7-install-pkg-jammy-amd64 #133
