# RESTfulSwarm


In [None]:
%matplotlib inline
import pymongo as mg
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns


# return a mongodb client
def get_client(usr, pwd, address, port='27017'):
    url = 'mongodb://%s:%s@%s:%s/RESTfulSwarmDB' % (usr, pwd, address, port)
    return mg.MongoClient(url)


# return a database object
def get_db(client, db_name):
    return client[db_name]


# return a collection cursor object
def get_col(db, col_name):
    return db[col_name]


# query data in a job collection
def query_col(col):
    return list(col.find({}))[0]


# get a list of job collection name
def get_all_cols(db):
     return db.collection_names()

In [None]:
def get_time(data):
    result = {'waiting_time': 0, 'execution_time': 0, 'total_time': 0}
    if len(data) == 0:
        return None
    submit_time = data['submit_time']
    start_time = data['start_time']
    end_time = data['end_time']
    
    waiting_time = start_time - submit_time
    result['waiting_time'] = waiting_time
    
    execution_time = end_time - start_time
    result['execution_time'] = execution_time
    
    total_time = end_time - submit_time
    result['total_time'] = total_time
    return result


In [None]:
def plot_graph(db):
    cols = get_all_cols(db)
    waiting_time = []
    execution_time = []
    total_time = []
    jobs = []
    for col in cols:
        if 'job' not in col:
            continue
        col_obj = get_col(db, col)
        job_info = query_col(col_obj)
        result = get_time(job_info)
        waiting_time.append(result['waiting_time'])
        execution_time.append(result['execution_time'])
        total_time.append(result['total_time'])
        jobs.append(col)
    return waiting_time, execution_time, total_time, jobs


In [None]:
client = get_client('admin', 'kzw', '129.59.107.139')
db = get_db(client, 'RESTfulSwarmDB')


In [None]:
graph_data = plot_graph(db)
waiting_time = graph_data[0]
execution_time = graph_data[1]
total_time = graph_data[2]
jobs = graph_data[3]


In [None]:
def plot_waiting_time():
    wt = pd.Series(waiting_time, index=jobs)
    wt.plot.bar(figsize=(10, 5))
    plt.xlabel('Job')
    plt.ylabel('Waiting Time(s)')
    plt.title('Job Waiting Time')

In [None]:
def plot_execution_time():
    et = pd.Series(execution_time, index=jobs)
    et.plot.bar(figsize=(10, 5))
    plt.xlabel('Job')
    plt.ylabel('Execution Time(s)')
    plt.title('Job Execution Time')


In [None]:
def plot_total_time():
    tt = pd.Series(total_time, index=jobs)
    tt.plot.bar(figsize=(10, 5))
    plt.xlabel('Job')
    plt.ylabel('Total Time(s)')
    plt.title('Job Total Time')


In [None]:
def plot_resource_graph():
    workers_resource_col = get_col(db, 'WorkersResourceInfo')
    data = query_col(workers_resource_col)
    hostnames = [worker['hostname'] for worker in data]
    time_stamps = [(detail[0] for detail in worker['details']) for worker in data]
    utilization = [(detail[1] for detail in worker['details']) for worker in data]
    
    data = {'Worker': hostnames, 'Time Stamp': time_stamps, 'Utilization Rate': utilization}
    df = pd.DataFrame(data)
    
    return sns.heatmap(data=df, annot=True, linewidths=0.5)

### WaitingTime vs Job


In [None]:
plot_waiting_time()

### ExecutionTime vs Job


In [None]:
plot_execution_time()


### TotalTime vs Job


In [None]:
plot_total_time()


### Workers Resource Utilization Graph


In [None]:
plot_resource_graph()