# Example Crash Report Notebook

Release Date: 

Target Engine Version (s): 

Analysis Date: 

Author: 



In [1]:
"""
Add module path to sys path -- necessary if running from jupyter notebook and not an IDE
"""

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
"""
General import statements
"""
import crash_analysis
import pandas as pd

"""
Constants
"""
cache_filename = 'betacrashes_1006.csv'
zipfile_location = os.path.join(module_path, 'tmp' + os.sep)
cache_location = os.path.join(module_path, 'src', 'data' + os.sep, cache_filename)
zipfile_location

'C:\\Dev\\CrashAnalysis\\tmp\\'

In [4]:
"""
Download crashes by date range
"""
from crash_analysis.downloader import download_time_range
import datetime

now = datetime.datetime.now()
start = now + datetime.timedelta(days=-1)
end = now


download_time_range(start, end, zipfile_location)

About to download 33 records.


In [5]:
"""
Parse zipfiles into dataframe
"""
from crash_analysis.parser import extract_zipfiles, xmldocs_to_dataframe

extract_zipfiles(zipfile_location)
df = xmldocs_to_dataframe(zipfile_location)

df.drop_duplicates(inplace=True)

BadZipFile: C:\Dev\CrashAnalysis\tmp\37e77ba2-921a-4634-b6c1-46ddebbe2761.zip


In [6]:
"""
Save/load dataframe to/from CSV
"""
from crash_analysis import read_csv

file_exists = os.path.isfile(cache_location)


if file_exists:
    df = read_csv(cache_location)
else:
    df.to_csv(cache_location, encoding='utf-8')

In [8]:
"""
Get metadata about dataframe
"""
col_names = crash_analysis.get_columns(df)
versions = df['AppVersion'].apply(str)
versions = versions.value_counts()
num_crashes = len(df)


print(col_names)
print()
print(versions)
print()
print('Number of crashes: {0}'.format(num_crashes))

['Unnamed: 0', 'ACCDT_Field', 'Active_ClientFileName', 'Active_Field', 'Active_Form', 'Active_FormsetID', 'Active_FormsetVersion', 'AppName', 'AppVersion', 'BasWin15.INI', 'BasWin16.INI', 'BasWin17.INI', 'Batch_ClientFileName', 'CrashGUID', 'CrashRpt', 'Current_Calcsection', 'CustNum', 'CustomProps', 'DataFileCount', 'ExceptionAddress', 'ExceptionCode', 'ExceptionModule', 'ExceptionModuleBase', 'ExceptionModuleVersion', 'ExceptionType', 'FileList', 'FormsPrinter', 'GUIResourceCount', 'GeoLocation', 'ImageName', 'InnerException', 'InstallType', 'InvParamExpression', 'InvParamFile', 'InvParamFunction', 'InvParamLine', 'Last_Calcsection', 'ManagedException', 'ManagedException.txt', 'MemoryUsageKbytes', 'Message', 'OSIs64Bit', 'OpenHandleCount', 'OperatingSystem', 'ProWin15.INI', 'ProWin16.INI', 'ProWin17.INI', 'ProblemDescription', 'Source', 'StackTrace', 'SystemTimeUTC', 'TimeStamp', 'TrustedCustomerException.txt', 'WorkStationName', 'WorkStationType', 'crashdump.dmp', 'crashrpt.xml', 'e

In [10]:
"""
Filter all crashes by app version. 
Output the number of crashes in this version. 
"""

from crash_analysis import filter_dataframe

march_release_df = filter_dataframe(df, AppVersion=2016120014)
len(march_release_df)

52

In [11]:
"""
Printed below will be a breakdown of the user's environment
"""

march_release_df.groupby(['AppName', 'InstallType', 'OperatingSystem'])['CrashGUID'].count()

AppName                         InstallType  OperatingSystem                                       
ProSeries - 2016                Network      Windows 10 Home Build 14393                               1
                                             Windows 7 Professional Build 7601                         6
                                             Windows Server 2012 Essentials Build 9200                 1
                                             Windows Server 2012 R2 Standard Build 9600                1
                                             Windows Small Business Server 2011 Standard Build 7601    2
                                Standalone   Windows 10 Home Build 14393                               2
                                             Windows 10 Home Build 15063                               4
                                             Windows 10 Pro Build 15063                                2
                                             Windows 7 Profe

In [12]:
"""
Printed below will be the dlls or executables that were the sources 
of a crash, and how many caused a crash.
"""
def get_last_in_path(path_string):
    return str(path_string).split('\\')[-1]

march_release_df['ExceptionModule'] = march_release_df['ExceptionModule'].apply(get_last_in_path)
march_release_df['ExceptionModule'].value_counts()

KERNELBASE.dll          18
nan                     16
protax16.exe             6
iebho.dll                3
c4dll.dll                2
fcsmapi.dll              2
CrashRpt1403.dll         2
mfc140.dll               1
user32.dll               1
mso20win32client.dll     1
Name: ExceptionModule, dtype: int64

In [13]:
"""
Get actual addresses of exceptions (subtract base address from exception address)
"""
def add_const_ex_col(df):
    to_base_16 = lambda x: int(x, 16) if not pd.isnull(x) else 0
    ex1 = df['ExceptionAddress'].apply(to_base_16)
    ex0 = df['ExceptionModuleBase'].apply(to_base_16)
    df['const_exception_addr'] = ex1 - ex0
    df['const_exception_addr'] = df['const_exception_addr'].apply(hex)
    
add_const_ex_col(march_release_df)

In [14]:
"""
These are the machine-agnostic exception addresses for all of the crashes, 
along with how many of this type of exception occurred
"""
march_release_df['const_exception_addr'].value_counts()

0xc54f                6
0x0                   6
0xda9f2               5
0xeb832               3
0x38f2                2
0x22bab               2
0x427a3               2
0x2fa0                2
0x6fb189              2
0x6e0077              1
0x6c0069              1
0x790053              1
0x17f2a               1
0x51d0000             1
0x11d4d               1
0xffffffffd2e8013a    1
0x21ccf48             1
0x105ed9              1
0x9fa6                1
0x630000              1
0x25cee7              1
0x1a00845             1
0x240119              1
0x78100e              1
0x22bbb               1
0x77                  1
0x15608               1
0x10c48300            1
0xbdae8               1
0x10f18               1
0x5bd330              1
Name: const_exception_addr, dtype: int64

In [None]:
"""
Here is a breakdown of the exception module, address number, and customer CAN, 
along with the number of crashes that occured for each grouping. 
"""

march_release_df.groupby([ 'ExceptionModuleName','const_exception_addr', 'CustNum'])['CrashGUID'].count()

In [17]:
"""
Display non-empty problem descriptions
"""

customer_desc_df = crash_analysis.get_column(march_release_df, 'ProblemDescription')

full_desc = crash_analysis.remove_empty(customer_desc_df)
print(full_desc)

162                        attempting to open the program
196                                          opening file
463                        creating PDF file for a client
492                                   Opening the program
497                        attempting to open the program
578                  Inputting password to access program
1066    Trying to login to program even after updating...
1206    I am temp locked . Tried to use Pro series to ...
1295                               will not open, crashed
1653                           Deleting file off Homebase
1747                        Trying to login into program!
1951                     Printing out a K1 reconciliation
2153                  CHANGING ADDRESS FOR PAGE 1 OF 1040
2468    Trying to print a client's tax return that was...
2919                                Launching the program
3057                                      trying to print
3227                                    it keeps crashing
Name: ProblemD

In [18]:
"""
Find frequency count (most mentioned root words) from
problem descriptions. 
"""
from crash_analysis.analysis import stem_frequency

stem_frequency(full_desc)


total words: 74
program    	:    7 	 [['program'], ['program'], ['program'], ['program'], ['program'], ['program']]
tri        	:    5 	 [['trying'], ['tried'], ['trying'], ['trying'], ['trying']]
open       	:    5 	 [['open'], ['opening'], ['opening'], ['open'], ['open']]
print      	:    3 	 [['printing'], ['print'], ['print']]
file       	:    3 	 [['file'], ['file'], ['file']]
lock       	:    3 	 [['locked'], ['lock'], ['locked']]
password   	:    2 	 [['password'], ['password']]
page       	:    2 	 [['page'], ['pages']]
attempt    	:    2 	 [['attempting'], ['attempting']]
crash      	:    2 	 [['crashed'], ['crashing']]
client     	:    2 	 [['client'], ['client']]
one        	:    2 	 [['one'], ['one']]
login      	:    2 	 [['login'], ['login']]
tax        	:    2 	 [['tax'], ['tax']]
return     	:    2 	 [['return'], ['return']]
input      	:    1 	 ['inputting']
approxim   	:    1 	 ['approximately']
launch     	:    1 	 ['launching']
1          	:    1 	 ['1']
chang      

([('program', 7),
  ('tri', 5),
  ('open', 5),
  ('print', 3),
  ('file', 3),
  ('lock', 3),
  ('password', 2),
  ('page', 2),
  ('attempt', 2),
  ('crash', 2),
  ('client', 2),
  ('one', 2),
  ('login', 2),
  ('tax', 2),
  ('return', 2),
  ('input', 1),
  ('approxim', 1),
  ('launch', 1),
  ('1', 1),
  ('chang', 1),
  ('k1', 1),
  ('even', 1),
  ('temp', 1),
  ('put', 1),
  ('creat', 1),
  ('60', 1),
  ('pdf', 1),
  ('address', 1),
  ('sinc', 1),
  ('delet', 1),
  ('done', 1),
  ('corerect', 1),
  ('flash', 1),
  ('homebas', 1),
  ('reconcili', 1),
  ('updat', 1),
  ('2017', 1),
  ('appar', 1),
  ('use', 1),
  ('seri', 1),
  ('april', 1),
  ('1040', 1),
  ('access', 1),
  ('keep', 1),
  ('pro', 1)],
 74,
                     words
 attempt        attempting
 open                 open
 program           program
 open              opening
 file                 file
 creat            creating
 pdf                   pdf
 file                 file
 client             client
 open          