# Crash Report Jan 18, 2016


## Step 1: Setup

In [1]:
"""
General import statements
"""
import crash_analysis
import pandas as pd
from matplotlib import pyplot as plt 
%matplotlib inline

In [3]:
"""
Parse zipfiles into dataframe
"""
from crash_analysis.parser import extract_zipfiles, xmldocs_to_dataframe
zipfile_location = 'C:\\CrashReports\\'

extract_zipfiles(zipfile_location)
df = xmldocs_to_dataframe(zipfile_location)

df.drop_duplicates(inplace=True)

In [4]:
"""
Save/load dataframe to/from CSV
"""
new_data = False
cache_location = 'src/data/2016040014.csv'

if new_data:
    df.to_csv(cache_location, encoding='utf-8')
else:
    df = crash_analysis.read_csv(cache_location)

In [5]:
"""
Get metadata about dataframe
"""
col_names = crash_analysis.get_columns(df)
versions = df['AppVersion'].value_counts()
num_crashes = len(df)

print(col_names)
print(versions)
print(num_crashes)

['ACCDT_Field', 'Active_ClientFileName', 'Active_Field', 'Active_Form', 'Active_FormsetID', 'Active_FormsetVersion', 'AppName', 'AppVersion', 'BasWin16.INI', 'Batch_ClientFileName', 'CrashGUID', 'CrashRpt', 'Current_Calcsection', 'CustNum', 'CustomProps', 'DataFileCount', 'ExceptionAddress', 'ExceptionCode', 'ExceptionModule', 'ExceptionModuleBase', 'ExceptionModuleVersion', 'ExceptionType', 'FileList', 'FormsPrinter', 'GUIResourceCount', 'GeoLocation', 'ImageName', 'InnerException', 'InstallType', 'Last_Calcsection', 'ManagedException', 'ManagedException.txt', 'MemoryUsageKbytes', 'Message', 'OSIs64Bit', 'OpenHandleCount', 'OperatingSystem', 'ProWin16.INI', 'ProblemDescription', 'Source', 'StackTrace', 'SystemTimeUTC', 'TimeStamp', 'WorkStationName', 'WorkStationType', 'crashdump.dmp', 'crashrpt.xml']
2016040014    5085
Name: AppVersion, dtype: int64
5085


In [6]:
"""
Customer Description Analysis
"""
from crash_analysis.analysis import remove_empty, get_column, stem_frequency
customer_desc = remove_empty(get_column(df, 'ProblemDescription'))

stem_frequency(customer_desc)
print

total words: 7650
instal     	:  463 	 [['installing'], ['installing'], ['installing'], ['installed'], ['install'], ['installing']]
open       	:  349 	 [['opening'], ['opening'], ['opening'], ['opening'], ['opened'], ['opening']]
2016       	:  318 	 [['2016'], ['2016'], ['2016'], ['2016'], ['2016'], ['2016']]
program    	:  254 	 [['program'], ['program'], ['program'], ['program'], ['program'], ['program']]
client     	:  244 	 [['clients'], ['clients'], ['client'], ['client'], ['client'], ['client']]
updat      	:  230 	 [['updating'], ['updates'], ['updates'], ['updates'], ['updates'], ['updates']]
proseri    	:  217 	 [['proseries'], ['proseries'], ['proseries'], ['proseries'], ['proseries'], ['proseries']]
file       	:  217 	 [['file'], ['files'], ['file'], ['file'], ['file'], ['file']]
tri        	:  191 	 [['trying'], ['trying'], ['trying'], ['trying'], ['trying'], ['trying']]
download   	:  166 	 [['downloading'], ['downloading'], ['downloading'], ['downloading'], ['downloadi

In [34]:
"""
Frequency of error messages and stack traces
"""

from crash_analysis import filter_dataframe


def get_value_from_hist(n, field='Message'):
    field_hist = df[field].value_counts()
    assert n < len(field_hist)
    return field_hist.keys()[n], field_hist.values[n]


def print_parings(n, field1='Message', groupings=['AppName', 'InstallType', 'WorkStationType', 'OperatingSystem']):
    top_key, count = get_value_from_hist(n, field1)
    df_filtered = filter_dataframe(df, **{field1: top_key})
    
    if type(groupings) is type(list()):
        field2_hist = df_filtered.groupby(groupings)['CustNum'].nunique()
    else:
        field2_hist = df_filtered[groupings].value_counts()


    print("Query {0} \t count: {1} \t {2}: {3}".format(n+1, count, field1, top_key.encode('ascii', 'ignore')))
    print(field2_hist)
    print '\n'

# [print_parings(n, groupings='ProblemDescription') for n in range(5)]
print_parings(1, groupings=['CustNum', 'CrashGUID'])

Query 2 	 count: 138 	 Message: Method not found: 'Int32 System.Runtime.InteropServices.Marshal.SizeOf(!!0)'.
CustNum     CrashGUID                           
0000087725  46c951f7-d192-45be-bffd-930e1cd2e111    1
0000356345  9c457a20-8e20-48e8-a1f3-1c4293f51f54    1
            9eeeb411-3bbf-4c1e-85a1-01c9801b079d    1
            c319626e-34fb-45d7-af81-8209ac1af518    1
0000391052  67418492-77b1-48fb-8358-96bfd9c82d15    1
            b85ef955-ee09-4d37-9382-ea9bb59a0a08    1
            cf1dc68f-cfdd-4afa-b674-152e2f753385    1
0000461049  819fe619-c4b9-4e45-a52b-a043101bb8fc    1
            e3e0c5e6-7cf9-4663-9f13-3ee49b61d56f    1
0000887695  4e6ecba2-709f-4096-8350-7329043e3020    1
            4eab30e3-7bc3-4e9a-a9a4-4567518c3fad    1
            72ccdd8c-4c8c-4139-8c5f-0f55cc73cac7    1
            7e5aa7dd-e4a0-41a0-bcd8-33b3c489070b    1
            8f1a8ce1-6c45-4bca-8fca-b177de5661ae    1
            9dc069c3-c319-42b5-9256-9623c1876945    1
            a3f9bf63-9d32-43e3-

In [79]:
""" Customer Descriptions """

from crash_analysis import remove_empty
# print(one_cust.head())
pd.set_option('display.max_colwidth', 250)
print(sum(remove_empty(df['ProblemDescription']).value_counts()))

35


In [80]:
"""
Associate by keyterm
"""
from crash_analysis.analysis import associate_by_keyterms

associate_by_keyterms(df, 'ProblemDescription', 'Message', min_count=2)
print

Message by Keyterm
keyterm: program
Method not found: 'Void Intuit.Application.ProSeries.OptionSetupWizardAPI.DataModels.AdditionalInfo..ctor(System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String)'.    2
Method not found: 'Int32 System.Runtime.InteropServices.Marshal.SizeOf(!!0)'.                                                                                                                                                                                                                      1
External component has thrown an exception.                                                                                                                                                                                                                                                        1
Name: Message, dtype: int64

keyterm: open
External component has thrown an exception