## Debug Functions
This notebook holds debug library code that's also in `debug.py`.  The code makes it easy to record variable values and states throughout program execution.  Basic usage is to call the `debug.init()` function to initialize a `dfDebug` DataFrame.  This can be followed by various `debug.loginfo()` calls to record the state of variables and DataFrames.  `dfDebug` can be written to a CSV file at the end of program executions to provide a filterable spreadsheet for investigating and exploring program execution.

Available `debug.loginfo` logtypes:

* `colinfo` - Specify Desc, DataFrame name and Column name (Desc2 and Val2 are optional).  Outputs overall count (size) and counts by datatype and null/not,
* `indexsize` - Specify Desc and DataFrame name as arguments (Desc2 and Val2 are optional; need dummy string for col if using Desc2 and Val2)
* `time` - all arguments optional except Desc (Desc2 and Val2 are optional; need dummy string for df and col if using Desc2 and Val2)
* `info` - Specify dummy arguments for DataFrame name and col if using Desc2 and Val2

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

#Debug library functions
def init():
    data = {'Desc':[], 'colname':[], 'size':[], 'dtype_string':[], 'dtype_int':[],
            'dtype_float':[], 'isnull':[], 'notnull':[], 'Desc2':[], 'Val2':[], 'time':[]}
    dfDebug = pd.DataFrame(data=data)
    
    #Trick Pandas into dtyping count columns as integer
    dfDebug.loc[0,:] = ['Dummy_val','',0,0,0,0,0,0,'','',0]
    lst_count_cols = ['size','dtype_string', 'dtype_int', 'dtype_float', 'isnull', 'notnull']
    dfDebug[lst_count_cols] = dfDebug[lst_count_cols].astype('int')

    return dfDebug

def CountDTypeString(df, col):
    return df.loc[df[col].apply(lambda x: isinstance(x, str)), col].size
def CountDTypeInt(df, col):
    return int(df.loc[df[col].apply(lambda x: isinstance(x, int)), col].size)
def CountDTypeFloat(df, col):
    return int(df.loc[df[col].apply(lambda x: isinstance(x, float)), col].size)
def CountNull(df, col):
    return int(df.loc[df[col].isnull(), col].size)
def CountNotNull(df, col):
    return int(df.loc[~df[col].isnull(), col].size)

#Add a new row to dfDebug
def loginfo(dfDebug, logtype, desc, df=None, col='', desc2='', val2=''):

    #Construct row as a list of values and append row to dfDebug
    if logtype == 'colinfo':
        lst = [desc, col, df[col].size, CountDTypeString(df, col), CountDTypeInt(df, col),
               CountDTypeFloat(df, col), CountNull(df, col), CountNotNull(df, col), desc2, val2, '']
    elif logtype == 'indexsize':
        lst = [desc,'',df.index.size, '', '', '', '', '', desc2, val2, '']
    elif logtype == 'time':
        lst = [desc, '', '', '', '', '', '', '', desc2, val2, dt.datetime.now().strftime('%H:%M:%S.%f')]
    elif logtype == 'info':
        lst = [desc, '','', '','', '','', '', desc2, val2, '']
    dfDebug.loc[dfDebug.index.size] = lst

    #Control dtype of count columns for nicer display
    if dfDebug.loc[0,'Desc'] == 'Dummy_val':
        dfDebug.drop(0, axis=0, inplace=True)
        dfDebug.reset_index(drop=True, inplace=True)
    lst_count_cols = ['size','dtype_string', 'dtype_int', 'dtype_float', 'isnull', 'notnull']
    dfDebug[lst_count_cols] = dfDebug[lst_count_cols].astype('str')
    return dfDebug

### Example logging

In [2]:
df = pd.DataFrame(data={'A':['a',1,3,2.0,np.nan], 'B':[10, 20, 30, 40, 50]})
print(df.info(), '\n\n', df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   A       4 non-null      object
 1   B       5 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 208.0+ bytes
None 

      A   B
0    a  10
1    1  20
2    3  30
3    2  40
4  NaN  50


In [3]:
dfDebug = init()
dfDebug = loginfo(dfDebug, 'colinfo', 'Column A with mixed-type values', df, 'A')
dfDebug

Unnamed: 0,Desc,colname,size,dtype_string,dtype_int,dtype_float,isnull,notnull,Desc2,Val2,time
0,Column A with mixed-type values,A,5,1,2,2,1,4,,,


### Examples of all `loginfo` logtypes

In [5]:
dfDebug = init()
dfDebug = loginfo(dfDebug, 'colinfo', 'Column A info', df, 'A')
dfDebug = loginfo(dfDebug, 'colinfo', 'Column B info', df, 'B')
dfDebug = loginfo(dfDebug, 'indexsize', 'Just index size', df)
dfDebug = loginfo(dfDebug, 'time', 'time at end')
dfDebug = loginfo(dfDebug, 'info', 'Customized nfo about df',None,'', 'Sum of column B', df['B'].sum())
dfDebug

Unnamed: 0,Desc,colname,size,dtype_string,dtype_int,dtype_float,isnull,notnull,Desc2,Val2,time
0,Column A info,A,5.0,1.0,2.0,2.0,1.0,4.0,,,
1,Column B info,B,5.0,0.0,5.0,0.0,0.0,5.0,,,
2,Just index size,,5.0,,,,,,,,
3,time at end,,,,,,,,,,12:28:26.867799
4,Customized nfo about df,,,,,,,,Sum of column B,150.0,
