###Notebook for Monitoring and logging Local System Performance

In [None]:
import psutil
import time
import datetime
import pandas as pd
import numpy as np

In [None]:
app_start_time = round(time.time(),2)

In [None]:
app_start_time

In [None]:
columns = ['name','pid','num_threads']
#index = pd.date_range(todays_date-datetime.timedelta(10), periods=10,freq='M')

#### Create the Master Data Frame

In [None]:
df = pd.DataFrame(columns = columns)

#### Create the Temporary DataFrame to Append to the Master

In [None]:
#tf = pd.DataFrame(columns = columns)

In [None]:
#tf

In [None]:
sample = 0

In [None]:
index = 0

In [None]:
application = "PBIDesktop.exe"

length = 22       # length in minutes of the logging session
interval = 5     # length in seconds of the polling interval

t_end = time.time() + 60 * int(length)
prev_sample_time = round(time.time(),2)        #Set the first occurence of the previous sample time. 

Loop through the psutils retrieval task.

In [None]:
while (time.time() < t_end):
    
    while (time.time() - prev_sample_time) > interval:
    
        prev_sample_time = round(time.time(),2) #Reset the previous sample time to the current sample time. 
        
        #Process currently loops through all the running processes and checks the parent name before obtaining results. 
        #Better to first identify the PBI parent process and instead loop through its children. 
        #This will save hundreds of loops through unwanted children. 

        for proc in psutil.process_iter():

            try:
                parent = proc.parent()
                name = str(parent.name())
                if name == application:
                    pr = proc.as_dict(attrs= ["name","pid","num_threads","memory_info"])
                    pr.update({'sample':sample,'parent':parent,'timestamp':str(prev_sample_time)})
                    
                    #improve performance in future by constructing a dictionary within the loop, then converting to dataframes after loop is finished. 
                    tf = pd.DataFrame.from_records([pr], index = [index]) 
                    df = df.append(tf)

                    #print(name)

            except:
                pass

            index += 1

        print('Sample ' + str(sample) + ' constructed at ' + str(round(prev_sample_time - app_start_time,2)))
        #Advance the sample by 1

        sample += 1

print('While loop finished because condition not met.')

#### The Resultant DataFrame

In [None]:
#df

#### UnNest the parent column into its constituent parts, and just take back the name.

In [None]:
df_parent = df['parent'].astype('str').str.split(",",expand = True)
#df_parent

In [None]:
#df

In [None]:
df_parent[1] = df_parent[1].astype('str').str.strip(" name=").str.strip("'")

In [None]:
#df_parent

In [None]:
df = df.join(df_parent).drop('parent', axis = 'columns').drop([0,2], axis = 'columns')
#df

In [None]:
df = df.rename(columns = {1:'Parent'})

In [None]:
#df

#### Unnest the mem_info column into its constituent parts

In [None]:
expand = df['memory_info'].astype('str').str.split(expand = True)
#expand

In [None]:
df10 = df.join(expand).drop(['memory_info',1,2,3,4,5,6,7,8,10],axis = 'columns')
#df10

#### Strip out the the unused text to leave int values. Convert from bytes to megabytes.

In [None]:
df10[9] = df10[9].str.strip('pagefile=').str.strip(',').astype('int64').div(1000000)

In [None]:
df10[11] = df10[11].str.strip('private=').str.strip(')').astype('int64').div(1000000)

In [None]:
df10[0] = df10[0].str.strip('pmem(rss=').str.strip(',').astype('int64').div(1000000)

In [None]:
#df10

#### Shape the final dataframe and write out to CSV

In [None]:
df20 = df10.rename(columns = {0:'Working Set',9:'Pagefile',11:'Private'})

In [None]:
cols = ['sample','Parent','pid','name','num_threads','Working Set','Pagefile','Private']

In [None]:
df20 = df20[cols]

In [None]:
#df20

In [None]:
df20.to_excel('log.xlsx')