# Matlab License Server Log File Analysis

In [None]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt

In [None]:
def read_log_df(filename, from_date = None, to_date = None):
    # Using readline()
    file = open(filename, 'r')
    count = 0
    cur_date = None
    
    re_timestamp = re.compile("TIMESTAMP ([\d/]+)")
    re_out = re.compile("^([\d\:]+) \(MLM\) OUT: \"(\w+)\" (.+?)\s*$")
    re_in = re.compile("^([\d\:]+) \(MLM\) IN: \"(\w+)\" (.+?)\s*$")
    
    users = {}
    output_list = []
    
    while True: # limit with count < 10000
        count += 1

        # Get next line from file
        line = file.readline()
        
        # watch for timestamps and update date
        p = re_timestamp.search(line)
        if p:
            # set date
            cur_date = p.group(1)
            #print("Match: ", cur_date)
        
        # check out one license
        p = re_out.match(line)
        if p:
            out_dict = { p.group(2): cur_date + " " + p.group(1) }
            if not p.group(3) in users:
                users[p.group(3)] = out_dict
            else:
                users[p.group(3)].update(out_dict)
            #print("OUT: ", p)
        
        # check in license, add row to dataframe
        p = re_in.match(line)
        if p:
            if ( not p.group(3) in users ) or ( not p.group(2) in users[p.group(3)] ):
                print("WARNING: IN without preceding OUT for user '" + p.group(3) + "' and license '"
                      + p.group(2) + "' at " + cur_date + " in line:\n" + line)
                print("Users: ", users, "\n")
            else:
                output_list.append([p.group(3), p.group(2), users[p.group(3)][p.group(2)], 
                            cur_date + " " + p.group(1)])
                del users[p.group(3)][p.group(2)]
                # print("IN: ", p)
            
        # if line is empty
        # end of file is reached
        if not line:
            break
        #print("Line{}: {}".format(count, line.strip()))

    file.close()
    output_df = pd.DataFrame(data=output_list, columns=['User','License','Start_time','End_time'])
    return output_df

In [None]:
log_df = read_log_df("lm_TMW-2018-2021.log")
log_df.to_csv("lm_TMW-2018-2021.csv")

In [None]:
# convert to real dates
log_df["Start_time"] = pd.to_datetime(log_df["Start_time"])
log_df["End_time"] = pd.to_datetime(log_df["End_time"])
log_df["Duration"] = log_df["End_time"] - log_df["Start_time"]
log_df

## Total Usage of All Licenses

In [None]:
fig = plt.figure()
ax = plt.axes()
log_df["Start_time"].hist(ax = ax, figsize=(12,3),bins=50)
plt.xlabel("Start date")
plt.ylabel("Count")

## Usage of Each License Type

In [None]:
group_license = log_df.groupby("License")
group_license["User"].count().sort_values(ascending=False)

In [None]:
hists = group_license.hist(column=["Start_time"], figsize=(12,3), bins=50, sharex=True, stacked=True)
for hist in hists.keys():
    hists[hist][0][0].set_title(hist) 
    hists[hist][0][0].set_xlabel("Start time")
    hists[hist][0][0].set_ylabel("Count")
plt.draw()

## Duration of Use

In [None]:
fig = plt.figure()
ax = plt.axes()
log_df["Duration"].astype('timedelta64[h]').hist(ax = ax, figsize=(12,3), bins=50)
plt.xlabel("Duration [hours]")
plt.ylabel("Count")

## By Users

In [None]:
group_user = log_df.groupby("User")
group_user["License"].count().sort_values(ascending=False)

In [None]:
hists = group_user.hist(column=["Start_time"], figsize=(12,3), bins=50, sharex=True, stacked=True)
for hist in hists.keys():
    hists[hist][0][0].set_title(hist) 
    hists[hist][0][0].set_xlabel("Start time")
    hists[hist][0][0].set_ylabel("Count")
plt.draw()