In [None]:
# Create report on the unique descriptions in a fallout report from geocomm, filter the fallouts as desired, 
# create CSVs to join to features or copies of features to customize your view in arcGIS

# These scripts can help with the tracking of specific fallout types overtime, as well as finding the root cause of repeated errors

In [3]:
# REQUIRED
import pandas as pd
import csv
import os


In [4]:
# Insert the file path to the CSV
file_path = (r"C:\path\to\your\directory\fileName.csv")

# Create the dataframe from the entire fallout report CSV
fallout = pd.read_csv(file_path, low_memory=False)


In [None]:
# Output only the column names. Used for copy/pasting names to match exactly
print(fallout.columns)

In [None]:
# Create a variable for a specific column
desc = fallout['DESCRIPTION']

# Create a variable for the counts of each unique value of that column
type_counts = desc.value_counts()

# output the type counts
print(type_counts)


In [None]:
# filter for a specific value found in the column of interest
# In this case, I only want entries with a description containing the Unique ID warning
rcl_split = fallout[fallout['DESCRIPTION'].str.contains("Unique ID value is not unique across all layers")]

# Write only the entries with the specific value in the column of interest to a new CSV 
# Here the output is a CSV file that looks just like the original fallout report, but only
# contains entries where the Description contains the text "Unique ID value is not unique across all layers"
rcl_split.to_csv(r"C:\path\to\your\directory\fileName.csv")

In [None]:
# filter by one particular description. Must be a full string match
desc_str = "This QC reports SSAP features that don't have a corresponding road segment in the road centerlines feature layer." 
desc_filter = fallout[fallout['DESCRIPTION']== desc_str]
#print(rcl_filter)

desc_filter.to_csv(r"C:\path\to\your\directory\fileName.csv")

In [None]:
# Maybe you want multiple description types in your pared-down fallout report. 
# Here I set multiple variables to the text copied from descriptions that were coming up a lot
desc1 = "RoadCenterlines line feature's attributes do not match intersecting IncMuni_L polygon - Single intersecting polygon "
desc2 = "RoadCenterlines line feature's attributes do not match intersecting IncMuni_R polygon - Single intersecting polygon"
desc3 = "SiteStructureAddressPoints point feature's attributes do not match intersecting Inc_Muni polygon - Multiple intersecting polygon"
desc4 = "SiteStructureAddressPoints point feature's attributes do not match intersecting Inc_Muni polygon - Single intersecting polygon"
desc5 = "This QC reports SSAP features that don't have a corresponding road segment in the road centerlines feature layer."

# Create an array of the descriptions
desc_arr = [desc1,desc2,desc3,desc4, desc5]

# Create a filter where the entries contain one of the descriptions in the array
fallout_filter = fallout[fallout['DESCRIPTION'].isin(desc_arr)]

# Create a new CSV of the filtered results
fallout_filter.to_csv(r"C:\path\to\your\directory\fileName.csv")


In [7]:
# Similar to the block above, you can create an array of the top n most common descriptions and filter for those. 

# set a variable for the number desired
n = 5
# create a list of the descriptions with the highest number of instances in the report
most_freq = fallout['DESCRIPTION'].value_counts()[:n].index.tolist()

# For testing purposes. This outputs the list of the most common descriptions, with the total number of occurences
tops = fallout['DESCRIPTION'].value_counts().head(n)
print(tops)


# Using the list of top 10 descriptions from above, create the filtered data frame
desc_filter = fallout[fallout['DESCRIPTION'].isin(most_freq)]

# Create a CSV of the filtered data
desc_filter.to_csv(r"C:\path\to\your\directory\fileName.csv")

DESCRIPTION
This QC synchronizes ALI data with SSAP data.                                                                        2470
MSAG record falls in a gap within the corresponding road ranges                                                      1325
This QC reports SSAP features that don't have a corresponding road segment in the road centerlines feature layer.     740
MSAG record has no matching RCL street name                                                                           620
No matching full street name found between the ALI record and the road segment                                        474
Name: count, dtype: int64


In [None]:
# The Description field contains higher level information about the fallout type. More specific information is found in 
# EXTENDED_INFORMATION, but this field contains info unique to the exact feature point or segment. 
# Use partial string matching to filter for all the values of a more specific fallout type, without the filter
# getting bogged down by the specific address point or RCL affected

# Set variable for the extended information column
extInfo = fallout_filter['EXTENDED_INFORMATION']

# Create a filtered dataframe of the entries with the matching text
fallout_filter2 = fallout_filter[extInfo.str.contains("same full street name")]

# export the filtered data to a CSV
fallout_filter2.to_csv(r"C:\path\to\your\directory\fileName.csv")