# Cleaning data for bubble chart

In [1]:
import csv

# Define the input and output file names
input_file_name = "cleaned_crash_data_zipc.txt"
output_file_name = "cleaned_crash_data_zipc.csv"

# Specify the delimiter used in the text file (e.g., ',' for CSV or '\t' for tab-separated values)
delimiter = ','

# Open the input text file for reading and the output CSV file for writing
with open(input_file_name, "r") as input_file, open(output_file_name, "w", newline='') as output_file:
    # Create a CSV writer
    csv_writer = csv.writer(output_file)

    # Iterate through the lines in the input file
    for line in input_file:
        # Split the line into fields using the specified delimiter
        fields = line.strip().split(delimiter)

        # Write the fields to the CSV file
        csv_writer.writerow(fields)

In [2]:
# Define a set to store unique contributing factors
unique_factors = set()

# Assuming your data is in a text file named "data.txt"
with open("cleaned_crash_data_zipc.csv", "r") as file:
    # Skip the header line
    next(file)

    # Iterate through each line in the file
    for line in file:
        # Split the line into fields
        fields = line.strip().split(',')

        # Get the contributing factors from CONTRIBUTING FACTOR VEHICLE 1 and CONTRIBUTING FACTOR VEHICLE 2
        factor_1 = fields[14].strip()
        factor_2 = fields[15].strip()

        # Add the factors to the set
        if factor_1:
            unique_factors.add(factor_1)
        if factor_2:
            unique_factors.add(factor_2)

# Convert the set to a list and sort it
unique_factors_list = sorted(list(unique_factors))

# Print the unique contributing factors
for factor in unique_factors_list:
    print(factor)


1
80
Accelerator Defective
Aggressive Driving/Road Rage
Alcohol Involvement
Animals Action
Backing Unsafely
Brakes Defective
Cell Phone (hand-Held)
Cell Phone (hand-held)
Cell Phone (hands-free)
Driver Inattention/Distraction
Driver Inexperience
Driverless/Runaway Vehicle
Drugs (Illegal)
Drugs (illegal)
Eating or Drinking
Failure to Keep Right
Failure to Yield Right-of-Way
Fatigued/Drowsy
Fell Asleep
Following Too Closely
Glare
Headlights Defective
Illnes
Illness
Lane Marking Improper/Inadequate
Listening/Using Headphones
Lost Consciousness
Obstruction/Debris
Other Electronic Device
Other Lighting Defects
Other Vehicular
Outside Car Distraction
Oversized Vehicle
Passenger Distraction
Passing Too Closely
Passing or Lane Usage Improper
Pavement Defective
Pavement Slippery
Pedestrian/Bicyclist/Other Pedestrian Error/Confusion
Physical Disability
Prescription Medication
Reaction to Other Uninvolved Vehicle
Reaction to Uninvolved Vehicle
Shoulders Defective/Improper
Steering Failure
Texting

changing following options to null to make it easier when creating the bubble to exclude those crashes 
* 1
* 80
* Unspecified

renaming these options due to being the same thing or similar to another option
* Cell phone (hand-Held) -> Cell phone (hand-held)
* Drugs (Illegal) -> Drugs (illegal)


In [5]:
# Define a dictionary to map old values to new values
value_mappings = {
    "1": "none",  # Change "1" to null
    "80": "none",  # Change "80" to null
    "Unspecified": "none",  # Change "Unspecified" to null
    "Cell phone (hand-Held)": "Cell phone (hand-held)",  # Rename "Cell phone (hand-Held)" to "Cell phone (hand-held)"
    "Drugs (Illegal)": "Drugs (illegal)"  # Rename "Drugs (Illegal)" to "Drugs (illegal)"
}

# Open the input file for reading and create a temporary output file for writing
with open("cleaned_crash_data_zipc.csv", "r") as input_file, open("data_modified.csv", "w") as output_file:
    # Write the header line to the output file
    header = input_file.readline()
    output_file.write(header)

    # Iterate through the remaining lines in the input file
    for line in input_file:
        # Split the line into fields
        fields = line.strip().split(',')

        # Iterate through the specific columns to perform the changes
        for i in [14, 15]:
            if fields[i] in value_mappings:
                # Modify the value as per the dictionary
                fields[i] = value_mappings[fields[i]]

        # Join the modified fields back into a line
        modified_line = ",".join(fields)

        # Write the modified line to the output file
        output_file.write(modified_line + '\n')

# Open the input file for reading and the output file for writing
with open("data_modified.csv", "r") as input_file, open("cleaned_crash_data_zipc.csv", "w", newline='') as output_file:
    # Read the contents of the input file and write them to the output file
    output_file.write(input_file.read())

# Optionally, you can delete the input file if you want to remove it
import os
os.remove("data_modified.csv")