# Measuring and logging the CPU usage at the `hpc05`

Takes a measuring point **every 15 minutes** and then updates this website.

Want to know something? Ask/e-mail Bas at [basnijholt@gmail.com](mailto:basnijholt@gmail.com).

_You can see the code by clicking on this button:_

In [None]:
from better_logger import *
print('Last time this script ran is at {}'.format(now))

# Current usage at the `hpc05`

In [None]:
!ssh hpc05 'bash -c "~/miniconda3/envs/dev/bin/python /home/basnijholt/Work/cluster_log/stat.py"'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

today = str(now.date())
month = now.strftime("%B")

processes = load_processes('database.p')
df = pd.DataFrame(processes)
df.index = pd.to_datetime(df.current_time, unit='s', utc=True)

gb = df.groupby('Job ID', as_index=False)
df['cpu_time'] = gb['cpu_time'].transform(lambda x: x-x.min())
df['reserved_time'] = gb['current_time'].transform(lambda x: x-x.min())
df['activity'] = df['cpu_time'] / df['reserved_time'] / df['num_cores'] * 100
lasts = gb.last()

def get_user_df(lasts, only_today=False):
    lasts = lasts.copy()
    if only_today:
        # select only today
        lasts.index = pd.to_datetime(lasts.pop('current_time'), unit='s', utc=True)
        lasts = lasts.loc[today]

    by_user = lasts.groupby('Username')
    reserved_days = by_user.reserved_time.sum() * by_user.num_cores.mean() / 86400
    cpu_days = by_user.cpu_time.sum() / 86400
    idle_days = reserved_days - cpu_days
    user_df = pd.DataFrame([cpu_days, reserved_days, idle_days, by_user.activity.mean()], 
                           index=['CPU time (days)', 'Reserved time (days)', 'IDLE time (days)', 'Activity (%)']).T
    return user_df

get_user_df(lasts, True)

# Data of the last 60 days

In [None]:
user_df = get_user_df(lasts)
user_df.sort_values('IDLE time (days)', ascending=False)

In [None]:
ax = user_df.sort_values('Activity (%)').plot.bar(y=['Reserved time (days)', 'CPU time (days)'])
ax.set_ylabel('CPU time in days')
ax.set_title('CPU time used per user for the last 60 days')

In [None]:
ax = df.groupby(df.index.weekday_name, sort=False).cpu_time.sum().divide(86400 * 365).plot.bar()
ax.set_ylabel('CPU time in years')
ax.set_title('CPU time per weekday in {}'.format(month))

In [None]:
ax = df.groupby(df.index.hour + tz_offset, sort=False).cpu_time.sum().divide(86400 * 365).plot.bar()
ax.set_ylabel('CPU time in years')
ax.set_title('CPU time per hour in {}'.format(month))

# Only today

In [None]:
user_df_today = get_user_df(lasts, only_today=True)
user_df_today.sort_values('IDLE time (days)', ascending=False)

In [None]:
ax = user_df_today.sort_values('Activity (%)').plot.bar(y=['Reserved time (days)', 'CPU time (days)'])
ax.set_ylabel('CPU time in days')
ax.set_title('CPU time per user today ({})'.format(today))