In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib.dates as mdates
plt.style.use('ggplot')
sns.set(font_scale=1.2)
sns.set_style("whitegrid")

def load_data(file_path):
    columns = ['timestamp', 'node_name', 'status', 'labels', 'current_job']
    df = pd.read_csv(file_path, names=columns, header=0)
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S')
    df['current_job'] = df['current_job'].replace('None', np.nan)
    # Replace the labels using the logic: if docker and gpu-reliable exists in labels use linux-gpu
    def apply_label_rules(labels):
        if 'docker' in labels and 'gpu-reliable' in labels:
            return 'linux-gpu'
        elif 'docker' in labels and 'gpu-reliable' not in labels:
            return 'linux'
        elif 'win' in labels and 'gpu-reliable' in labels:
            return 'windows-gpu'
        elif 'win' in labels and 'gpu-reliable' not in labels:
            return 'windows'        
        elif 'osx' in labels:
            for label in labels.split(', '):
                if label.startswith('osx_'):
                    return label
        elif 'linux-arm64' in labels:
            return 'linux-arm64'          
        else:
            return labels

    df['labels'] = df['labels'].apply(apply_label_rules)     
    return df

def calculate_node_stats(df):
    nodes = df['node_name'].unique()
    node_stats = {}
    
    for node in nodes:
        node_df = df[df['node_name'] == node].sort_values('timestamp')
        first_seen = node_df['timestamp'].min()
        last_seen = node_df['timestamp'].max()
        time_alive = (last_seen - first_seen).total_seconds() / 60
        
        busy_records = node_df[node_df['status'] == 'Busy']
        busy_periods = []
        if not busy_records.empty:
            node_df['status_changed'] = node_df['status'] != node_df['status'].shift(1)
            status_changes = node_df[node_df['status_changed']].copy()
            last_records = node_df.groupby('status').last().reset_index()
            status_change_points = pd.concat([status_changes, last_records]).sort_values('timestamp')
            
            current_status = None
            start_time = None
            
            for _, row in status_change_points.iterrows():
                if current_status == 'Busy' and start_time is not None:
                    duration = (row['timestamp'] - start_time).total_seconds() / 60
                    busy_periods.append({
                        'start': start_time,
                        'end': row['timestamp'],
                        'duration': duration,
                        'job': row['current_job']
                    })
                
                current_status = row['status']
                start_time = row['timestamp']
        
        total_busy_time = sum(period['duration'] for period in busy_periods)
        busy_percentage = (total_busy_time / time_alive * 100) if time_alive > 0 else 0
        
        node_stats[node] = {
            'first_seen': first_seen,
            'last_seen': last_seen,
            'time_alive': time_alive,
            'busy_time': total_busy_time,
            'idle_time': time_alive - total_busy_time,
            'busy_percentage': busy_percentage,
            'busy_periods': busy_periods,
            'status_history': node_df[['timestamp', 'status', 'current_job']].to_dict('records'),
            'labels': node_df['labels'].iloc[0]
        }
    
    return node_stats

def generate_node_report(node_stats):
    sorted_nodes = sorted(node_stats.items(), key=lambda x: x[1]['busy_percentage'], reverse=True)
    report_data = []
    for node, stats in sorted_nodes:
        report_data.append({
            'Node Name': node,
            'Time Alive (min)': round(stats['time_alive'], 2),
            'Busy Time (min)': round(stats['busy_time'], 2),
            'Idle Time (min)': round(stats['idle_time'], 2),
            'Busy (%)': round(stats['busy_percentage'], 2),
            'First Seen': stats['first_seen'],
            'Last Seen': stats['last_seen'],
            'Number of Jobs': len(stats['busy_periods']),
            'Labels': stats['labels']
        })
    
    report_df = pd.DataFrame(report_data)
    return report_df

def analyze_jenkins_nodes(df, title='Jenkins Nodes Activity'):
    node_stats = calculate_node_stats(df)
    report = generate_node_report(node_stats)
    display(report.style.set_caption(title).set_table_styles(
        [{'selector': 'thead th', 'props': [('background-color', '#f7f7f9'), ('color', '#333'), ('border', '1px solid #ddd')]},
         {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', '#fff')]},
         {'selector': 'tbody td', 'props': [('border', '1px solid #ddd'), ('color', '#000')]}]
    ).set_properties(**{'text-align': 'center'}))
    
    print("Node Activity Report:")
    print("===================")
    print(report)
    

    
    return df, node_stats, report


df = load_data("agent_data.csv")
# Absolute
df, node_stats, report = analyze_jenkins_nodes(df)
# filter data to show only workdays
df_workdays = df[df['timestamp'].dt.dayofweek < 5]
df_workdays, node_stats, report = analyze_jenkins_nodes(df_workdays, title='Jenkins Nodes Activity (Workdays)')
# filter data to show only weekends
df_weekends = df[df['timestamp'].dt.dayofweek >= 5]
df_weekends, node_stats, report = analyze_jenkins_nodes(df_weekends, title='Jenkins Nodes Activity (Weekends)')






Unnamed: 0,Node Name,Time Alive (min),Busy Time (min),Idle Time (min),Busy (%),First Seen,Last Seen,Number of Jobs,Labels
0,linux-ec2.nv.focal-3865fe66,104.83,104.83,0.0,100.0,2025-03-14 17:50:10,2025-03-14 19:35:00,1,linux-gpu
1,linux-ec2.nv.focal-be0e29be,725.0,565.57,159.43,78.01,2025-03-14 05:35:01,2025-03-14 17:40:01,4,linux-gpu
2,linux-ec2.nv.focal-5ae870bd,1715.0,1125.1,589.9,65.6,2025-03-13 00:45:00,2025-03-14 05:20:00,13,linux-gpu
3,linux-ec2.nv.focal-cbfb18c1,1940.17,1197.33,742.83,61.71,2025-03-12 21:30:02,2025-03-14 05:50:12,25,linux-gpu
4,linux-ec2.nv.focal-e2106c45,3805.02,2144.07,1660.95,56.35,2025-03-14 19:40:00,2025-03-17 11:05:01,34,linux-gpu
5,win-ec2.nv.WinS22-c738539f,6729.98,1783.07,4946.92,26.49,2025-03-12 18:55:02,2025-03-17 11:05:01,52,windows-gpu
6,linux-ip-172-30-1-34.focal-3bd74cfe,99.98,26.22,73.77,26.22,2025-03-12 18:55:02,2025-03-12 20:35:01,2,linux
7,linux-ec2.nv.focal-28173d4a,344.97,70.02,274.95,20.3,2025-03-12 18:55:02,2025-03-13 00:40:00,5,linux-gpu
8,linux-ip-172-30-1-34.focal-78f9f008,64.98,10.0,54.98,15.39,2025-03-13 19:10:01,2025-03-13 20:15:00,2,linux
9,mac-eight.ventura,6729.98,792.43,5937.55,11.77,2025-03-12 18:55:02,2025-03-17 11:05:01,78,osx_ventura


Node Activity Report:
                               Node Name  Time Alive (min)  Busy Time (min)  \
0            linux-ec2.nv.focal-3865fe66            104.83           104.83   
1            linux-ec2.nv.focal-be0e29be            725.00           565.57   
2            linux-ec2.nv.focal-5ae870bd           1715.00          1125.10   
3            linux-ec2.nv.focal-cbfb18c1           1940.17          1197.33   
4            linux-ec2.nv.focal-e2106c45           3805.02          2144.07   
5             win-ec2.nv.WinS22-c738539f           6729.98          1783.07   
6    linux-ip-172-30-1-34.focal-3bd74cfe             99.98            26.22   
7            linux-ec2.nv.focal-28173d4a            344.97            70.02   
8    linux-ip-172-30-1-34.focal-78f9f008             64.98            10.00   
9                      mac-eight.ventura           6729.98           792.43   
10                       mac-five.sonoma           6729.98           711.35   
11                       mac-s

Unnamed: 0,Node Name,Time Alive (min),Busy Time (min),Idle Time (min),Busy (%),First Seen,Last Seen,Number of Jobs,Labels
0,linux-ec2.nv.focal-3865fe66,104.83,104.83,0.0,100.0,2025-03-14 17:50:10,2025-03-14 19:35:00,1,linux-gpu
1,linux-ec2.nv.focal-e2106c45,3805.02,3409.93,395.08,89.62,2025-03-14 19:40:00,2025-03-17 11:05:01,11,linux-gpu
2,linux-ec2.nv.focal-be0e29be,725.0,565.57,159.43,78.01,2025-03-14 05:35:01,2025-03-14 17:40:01,4,linux-gpu
3,linux-ec2.nv.focal-5ae870bd,1715.0,1125.1,589.9,65.6,2025-03-13 00:45:00,2025-03-14 05:20:00,13,linux-gpu
4,linux-ec2.nv.focal-cbfb18c1,1940.17,1197.33,742.83,61.71,2025-03-12 21:30:02,2025-03-14 05:50:12,25,linux-gpu
5,win-ec2.nv.WinS22-c738539f,6729.98,4153.27,2576.72,61.71,2025-03-12 18:55:02,2025-03-17 11:05:01,33,windows-gpu
6,linux-ip-172-30-1-34.focal-3bd74cfe,99.98,26.22,73.77,26.22,2025-03-12 18:55:02,2025-03-12 20:35:01,2,linux
7,linux-ec2.nv.focal-28173d4a,344.97,70.02,274.95,20.3,2025-03-12 18:55:02,2025-03-13 00:40:00,5,linux-gpu
8,linux-ip-172-30-1-34.focal-78f9f008,64.98,10.0,54.98,15.39,2025-03-13 19:10:01,2025-03-13 20:15:00,2,linux
9,linux-ip-172-30-1-34.focal-cb0d5d3d,1725.0,164.98,1560.02,9.56,2025-03-13 19:10:01,2025-03-14 23:55:01,22,linux


Node Activity Report:
                               Node Name  Time Alive (min)  Busy Time (min)  \
0            linux-ec2.nv.focal-3865fe66            104.83           104.83   
1            linux-ec2.nv.focal-e2106c45           3805.02          3409.93   
2            linux-ec2.nv.focal-be0e29be            725.00           565.57   
3            linux-ec2.nv.focal-5ae870bd           1715.00          1125.10   
4            linux-ec2.nv.focal-cbfb18c1           1940.17          1197.33   
5             win-ec2.nv.WinS22-c738539f           6729.98          4153.27   
6    linux-ip-172-30-1-34.focal-3bd74cfe             99.98            26.22   
7            linux-ec2.nv.focal-28173d4a            344.97            70.02   
8    linux-ip-172-30-1-34.focal-78f9f008             64.98            10.00   
9    linux-ip-172-30-1-34.focal-cb0d5d3d           1725.00           164.98   
10                     mac-eight.ventura           6729.98           552.65   
11   linux-ip-172-30-1-34.foca

Unnamed: 0,Node Name,Time Alive (min),Busy Time (min),Idle Time (min),Busy (%),First Seen,Last Seen,Number of Jobs,Labels
0,linux-ec2.nv.focal-e2106c45,2871.97,1611.1,1260.87,56.1,2025-03-15 00:03:02,2025-03-16 23:55:00,25,linux-gpu
1,win-ec2.nv.WinS22-c738539f,2871.97,506.77,2365.2,17.65,2025-03-15 00:03:02,2025-03-16 23:55:00,21,windows-gpu
2,mac-five.sonoma,2871.97,340.42,2531.55,11.85,2025-03-15 00:03:02,2025-03-16 23:55:00,32,osx_sonoma
3,mac-six.ventura,2871.97,324.43,2547.53,11.3,2025-03-15 00:03:02,2025-03-16 23:55:00,41,osx_ventura
4,mac-four.ventura,2871.97,268.15,2603.82,9.34,2025-03-15 00:03:02,2025-03-16 23:55:00,31,osx_ventura
5,linux-ip-172-30-1-34.focal-cb0d5d3d,581.97,50.0,531.97,8.59,2025-03-15 00:03:02,2025-03-15 09:45:00,3,linux
6,mac-eight.ventura,2871.97,239.78,2632.18,8.35,2025-03-15 00:03:02,2025-03-16 23:55:00,30,osx_ventura
7,linux-ip-172-30-1-34.focal-a1c847a0,2871.97,208.5,2663.47,7.26,2025-03-15 00:03:02,2025-03-16 23:55:00,28,linux
8,linux-ip-172-30-1-34.focal-bf09b343,2284.98,152.52,2132.47,6.67,2025-03-15 09:50:01,2025-03-16 23:55:00,24,linux
9,win-ec2.nv.WinS22-5a866937,2871.97,170.28,2701.68,5.93,2025-03-15 00:03:02,2025-03-16 23:55:00,13,windows


Node Activity Report:
                               Node Name  Time Alive (min)  Busy Time (min)  \
0            linux-ec2.nv.focal-e2106c45           2871.97          1611.10   
1             win-ec2.nv.WinS22-c738539f           2871.97           506.77   
2                        mac-five.sonoma           2871.97           340.42   
3                        mac-six.ventura           2871.97           324.43   
4                       mac-four.ventura           2871.97           268.15   
5    linux-ip-172-30-1-34.focal-cb0d5d3d            581.97            50.00   
6                      mac-eight.ventura           2871.97           239.78   
7    linux-ip-172-30-1-34.focal-a1c847a0           2871.97           208.50   
8    linux-ip-172-30-1-34.focal-bf09b343           2284.98           152.52   
9             win-ec2.nv.WinS22-5a866937           2871.97           170.28   
10                core-packages-dfe10fd3           2871.97            14.97   
11           linux-ec2.nv.foca