# Crash Report Jan 18, 2016

Version: 2015050021

In [1]:
"""
General import statements
"""
import crash_analysis
import pandas as pd
from matplotlib import pyplot as plt 
%matplotlib inline

In [2]:
"""
Parse zipfiles into dataframe
"""
from crash_analysis.parser import extract_zipfiles, xmldocs_to_dataframe
zipfile_location = 'C:\\CrashReports\\'

extract_zipfiles(zipfile_location)
df = xmldocs_to_dataframe(zipfile_location)

df.drop_duplicates(inplace=True)

In [3]:
"""
Save/load dataframe to/from CSV
"""
from crash_analysis import read_csv

new_data = True
cache_location = 'src/data/2015050021_feb7_530pm.csv'

if new_data:
    df.to_csv(cache_location, encoding='utf-8')
else:
    df = read_csv(cache_location)

In [4]:
"""
Get metadata about dataframe
"""
col_names = crash_analysis.get_columns(df)
versions = df['AppVersion'].value_counts()
num_crashes = len(df)

# 5350 Crashes

print(col_names)
print(versions)
print(num_crashes)

['ACCDT_Field', 'Active_ClientFileName', 'Active_Field', 'Active_Form', 'Active_FormsetID', 'Active_FormsetVersion', 'AppName', 'AppVersion', 'BasWin15.INI', 'Batch_ClientFileName', 'CrashGUID', 'CrashRpt', 'Current_Calcsection', 'CustNum', 'CustomProps', 'ExceptionAddress', 'ExceptionCode', 'ExceptionModule', 'ExceptionModuleBase', 'ExceptionModuleVersion', 'ExceptionType', 'FileList', 'GUIResourceCount', 'GeoLocation', 'ImageName', 'InstallType', 'InvParamExpression', 'InvParamFile', 'InvParamFunction', 'InvParamLine', 'Last_Calcsection', 'MemoryUsageKbytes', 'OSIs64Bit', 'OpenHandleCount', 'OperatingSystem', 'ProWin15.INI', 'ProblemDescription', 'SystemTimeUTC', 'WorkStationName', 'WorkStationType', 'crashdump.dmp', 'crashrpt.xml']
2015050021    5350
Name: AppVersion, dtype: int64
5350


In [7]:
from crash_analysis import filter_dataframe
df = filter_dataframe(df, AppVersion=2016070012)
len(df)

6054

In [8]:
"""
Customer Description Analysis
"""
from crash_analysis.analysis import remove_empty, get_column, stem_frequency
customer_desc = remove_empty(get_column(df, 'ProblemDescription'))

stem_frequency(customer_desc)

print




In [20]:
"""
Frequency of error messages and stack traces
"""

from crash_analysis import filter_dataframe


def get_value_from_hist(n, field='Message'):
    field_hist = df[field].value_counts()
    assert n < len(field_hist)
    return field_hist.keys()[n], field_hist.values[n]


def print_parings(n, field1='Message', groupings=['AppName', 'InstallType', 'OperatingSystem'], print_output=True):
    top_key, count = get_value_from_hist(n, field1)
    df_filtered = filter_dataframe(df, **{field1: top_key})
    df_filt = df_filtered.copy()
    if type(groupings) is type(list()):
        field2_hist = df_filtered.groupby(groupings)['CustNum'].nunique()
    else:
        field2_hist = df_filtered[groupings].value_counts()

    if print_output:
        print("Query {0} \t count: {1} \t {2}: {3}".format(n+1, count, field1, top_key.encode('ascii', 'ignore')))
        print(field2_hist)
        print(sum(field2_hist))
        print '\n'
    
# 
# pd.set_option('display.max_rows', 5000)
# pd.set_option('display.height', 5000)
# [print_parings(n, groupings='ProblemDescription') for n in range(5)]
# print_parings(0, groupings='CustNum')
# [print_parings(n, field1='ExceptionAddress') for n in [0]]
# [print_parings(n, field1='ExceptionAddress', groupings='ProblemDescription') for n in [0]]

[print_parings(n, field1='ExceptionAddress') for n in range(10)]

file_counts_df = filter_dataframe(df, ExceptionAddress='0x100425c3')
print(len(file_counts_df))

# file_counts_df = filtered_df.apply(lambda x: pd.to_numeric(x, errors='coerce'))
# file_counts_df.hist(bins=50)
# # file_counts_df = pd.to_numeric(file_counts_df, errors='ignore')
# # file_counts_df.mean()
# print(file_counts_df.min(), file_counts_df.max())
# df[df['CustNum'] == '0614456913']

Query 1 	 count: 106 	 ExceptionAddress: 0x100425c3
AppName           InstallType  OperatingSystem                                 
ProSeries - 2015  Network      Windows 10 Home Build 10586                          2
                               Windows 10 Pro Build 10586                          10
                               Windows 7 Enterprise Build 7601                      2
                               Windows 7 Home Premium Build 7601                    9
                               Windows 7 Home Premium Build 7601 Service Pack 1     2
                               Windows 7 Professional Build 7601                   48
                               Windows 7 Professional Build 7601 Service Pack 1     9
                               Windows 7 Ultimate Build 7601                        3
                               Windows 7 Ultimate Build 7601 Service Pack 1         1
                               Windows 8 Pro Build 9200                             1
        

In [79]:
""" Customer Descriptions """

from crash_analysis import remove_empty
# print(one_cust.head())
pd.set_option('display.max_colwidth', 250)
print(sum(remove_empty(df['ProblemDescription']).value_counts()))

35


In [80]:
"""
Associate by keyterm
"""
from crash_analysis.analysis import associate_by_keyterms

associate_by_keyterms(df, 'ProblemDescription', 'Message', min_count=2)
print

Message by Keyterm
keyterm: program
Method not found: 'Void Intuit.Application.ProSeries.OptionSetupWizardAPI.DataModels.AdditionalInfo..ctor(System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String, System.String)'.    2
Method not found: 'Int32 System.Runtime.InteropServices.Marshal.SizeOf(!!0)'.                                                                                                                                                                                                                      1
External component has thrown an exception.                                                                                                                                                                                                                                                        1
Name: Message, dtype: int64

keyterm: open
External component has thrown an exception