In [1]:
import pandas as pd
from pathlib import Path
import re

In [2]:
# Set the limit for max rows and columns to display
pd.options.display.max_rows = 50
pd.options.display.max_columns = 50

In [3]:
#big_path = "../../../Traffic_Violations.csv"
big_path = "Traffic_Violations.csv"
big_df = pd.read_csv(big_path, low_memory=False)

In [4]:
big_df.head()

Unnamed: 0,SeqID,Date Of Stop,Time Of Stop,Agency,SubAgency,Description,Location,Latitude,Longitude,Accident,Belts,Personal Injury,Property Damage,Fatal,Commercial License,HAZMAT,Commercial Vehicle,Alcohol,Work Zone,Search Conducted,Search Disposition,Search Outcome,Search Reason,Search Reason For Stop,Search Type,Search Arrest Reason,State,VehicleType,Year,Make,Model,Color,Violation Type,Charge,Article,Contributed To Accident,Race,Gender,Driver City,Driver State,DL State,Arrest Type,Geolocation
0,52282e8c-f2e1-4bb5-8509-2d5e4f8da8ca,05/01/2023,23:11:00,MCP,"3rd District, Silver Spring",OPERATING UNREGISTERED MOTOR VEHICLE ON HIGHWAY,BRIGGS CHANEY RD @ COLUMIBA PIKE,0.0,0.0,No,No,No,No,No,No,No,No,No,No,No,,Citation,,17-107(a1),,,MD,02 - Automobile,2007.0,CHEV,CRUZ,BLACK,Citation,13-401(b1),Transportation Article,False,WHITE,M,GAITHERSBURG,MD,MD,A - Marked Patrol,"(0.0, 0.0)"
1,971ef50f-f138-419f-89e5-5d2cc5d7b75a,04/30/2023,23:55:00,MCP,"4th District, Wheaton",DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI...,KEMP MILL ROAD @ ALPERT LANE,39.053038,-77.024637,No,No,No,No,No,No,No,No,No,No,No,,Citation,,55*,,,MD,02 - Automobile,2013.0,HYUNDAI,SONATA,RED,Citation,13-401(h),Transportation Article,False,WHITE,F,SILVER SPRING,MD,MD,A - Marked Patrol,"(39.0530383333333, -77.0246366666667)"
2,1b3ad74c-9e3b-47af-acba-cc3fcb656e6f,04/30/2023,04:24:00,MCP,"2nd District, Bethesda",NEGLIGENT DRIVING VEHICLE IN CARELESS AND IMPR...,JONES MILL ROAD/ LE VELLE DRIVE,38.99148,-77.097113,No,No,No,No,No,No,No,No,No,No,No,,Citation,,21-304(c),,,MD,02 - Automobile,2019.0,NISSAN,ALTIMA,GRAY,Citation,21-901.1(b),Transportation Article,False,BLACK,F,SILVER SPRING,MD,MD,A - Marked Patrol,"(38.99148, -77.0971133333333)"
3,1b3ad74c-9e3b-47af-acba-cc3fcb656e6f,04/30/2023,04:24:00,MCP,"2nd District, Bethesda",RECKLESS DRIVING VEHICLE IN WANTON AND WILLFUL...,JONES MILL ROAD/ LE VELLE DRIVE,38.99148,-77.097113,No,No,No,No,No,No,No,No,No,No,No,,Citation,,21-304(c),,,MD,02 - Automobile,2019.0,NISSAN,ALTIMA,GRAY,Citation,21-901.1(a),Transportation Article,False,BLACK,F,SILVER SPRING,MD,MD,A - Marked Patrol,"(38.99148, -77.0971133333333)"
4,1b3ad74c-9e3b-47af-acba-cc3fcb656e6f,04/30/2023,04:24:00,MCP,"2nd District, Bethesda",FAILURE TO DRIVE VEHICLE ON RIGHT HALF OF ROAD...,JONES MILL ROAD/ LE VELLE DRIVE,38.99148,-77.097113,No,No,No,No,No,No,No,No,No,No,No,,Citation,,21-304(c),,,MD,02 - Automobile,2019.0,NISSAN,ALTIMA,GRAY,Citation,21-301(a),Transportation Article,False,BLACK,F,SILVER SPRING,MD,MD,A - Marked Patrol,"(38.99148, -77.0971133333333)"


In [5]:
big_df.columns

Index(['SeqID', 'Date Of Stop', 'Time Of Stop', 'Agency', 'SubAgency',
       'Description', 'Location', 'Latitude', 'Longitude', 'Accident', 'Belts',
       'Personal Injury', 'Property Damage', 'Fatal', 'Commercial License',
       'HAZMAT', 'Commercial Vehicle', 'Alcohol', 'Work Zone',
       'Search Conducted', 'Search Disposition', 'Search Outcome',
       'Search Reason', 'Search Reason For Stop', 'Search Type',
       'Search Arrest Reason', 'State', 'VehicleType', 'Year', 'Make', 'Model',
       'Color', 'Violation Type', 'Charge', 'Article',
       'Contributed To Accident', 'Race', 'Gender', 'Driver City',
       'Driver State', 'DL State', 'Arrest Type', 'Geolocation'],
      dtype='object')

In [6]:
# Drop the columns that we have deemed unhelpful
big_df_smaller = big_df.drop(["Agency", "Geolocation", "VehicleType", "HAZMAT", 
                                "Commercial Vehicle", "Commercial License", "Article", "Charge", 
                                "SeqID", "SubAgency", "Model", "Contributed To Accident", "DL State", 
                                "Driver City", "Arrest Type", "Search Reason For Stop"], axis = 1)

In [7]:
# Drop rows that do not result in a citation or warning
big_df_smaller.drop(big_df_smaller[big_df_smaller['Violation Type'] == 'ESERO'].index, inplace=True)
big_df_smaller.drop(big_df_smaller[big_df_smaller['Violation Type'] == 'SERO'].index, inplace=True)

In [8]:
# Clean the Year column
# Drop rows with null year
big_df_smaller = big_df_smaller.dropna(subset=["Year"])
# Change year to integer
big_df_smaller["Year"]= big_df_smaller["Year"].astype(int)
# Get years in a list
years = big_df_smaller["Year"].unique()
# Make a list of garbage years
years_to_remove = [0, 6338, 1005, 1196, 2912, 1009, 2088, 1007, 2102, 2109, 2105, 2997]
# Clean the list of years
good_years = [value for value in years if value not in years_to_remove]
# Keep only rows with good years
big_df_smaller = big_df_smaller[big_df_smaller["Year"].isin(good_years)]
big_df_smaller.shape

(1790375, 27)

In [9]:
# Keep only data that has latitude and longitude
big_df_smaller = big_df_smaller[(big_df_smaller['Latitude'] != 0) & (big_df_smaller['Longitude'] != 0)]


In [10]:
big_df_smaller.columns

Index(['Date Of Stop', 'Time Of Stop', 'Description', 'Location', 'Latitude',
       'Longitude', 'Accident', 'Belts', 'Personal Injury', 'Property Damage',
       'Fatal', 'Alcohol', 'Work Zone', 'Search Conducted',
       'Search Disposition', 'Search Outcome', 'Search Reason', 'Search Type',
       'Search Arrest Reason', 'State', 'Year', 'Make', 'Color',
       'Violation Type', 'Race', 'Gender', 'Driver State'],
      dtype='object')

In [11]:
big_df_smaller.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1665844 entries, 1 to 1878004
Data columns (total 27 columns):
 #   Column                Non-Null Count    Dtype  
---  ------                --------------    -----  
 0   Date Of Stop          1665844 non-null  object 
 1   Time Of Stop          1665844 non-null  object 
 2   Description           1665836 non-null  object 
 3   Location              1665841 non-null  object 
 4   Latitude              1665844 non-null  float64
 5   Longitude             1665844 non-null  float64
 6   Accident              1665844 non-null  object 
 7   Belts                 1665844 non-null  object 
 8   Personal Injury       1665844 non-null  object 
 9   Property Damage       1665844 non-null  object 
 10  Fatal                 1665844 non-null  object 
 11  Alcohol               1665844 non-null  object 
 12  Work Zone             1665844 non-null  object 
 13  Search Conducted      1023793 non-null  object 
 14  Search Disposition    71352 non-nu

In [12]:
# null_count = big_df_smaller['Accident'].isnull().sum()
# null_count

In [13]:
# if big_df_smaller.isnull().values.any():
#     print("There are null values in the DataFrame.")
# else:
#     print("There are no null values in the DataFrame.")

In [14]:
# big_df_smaller[['Fatal', 'Alcohol', 'Work Zone', 'Search Conducted',
#        'Search Disposition', 'Search Outcome', 'Search Reason']].isnull()

In [15]:
# # Check the number of null values in each column
# null_count_per_column = big_df_smaller.isnull().sum()

# # Display the result
# print("Number of null values in each column:")
# print(null_count_per_column)


In [16]:

# big_df_smaller["Search Reason"].value_counts(dropna = False)


In [17]:
big_df_smaller['Date Of Stop'] = pd.to_datetime(big_df_smaller['Date Of Stop'])
big_df_smaller['Year of Stop'] = big_df_smaller['Date Of Stop'].dt.year
big_df_smaller['Month of Stop'] = big_df_smaller['Date Of Stop'].dt.month
random_forest_df = big_df_smaller.drop(['Date Of Stop'], axis = 1)

In [18]:
# Drop rows with null Description
big_df_smaller = big_df_smaller.dropna(subset=["Description"])

In [19]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*EXCEEDING.*SPEED LIMIT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Speeding', x))

In [20]:
big_df_smaller["Description"].nunique()

14529

In [21]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAILURE.*YIELD.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Failure to Yield', x))

In [22]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAIL.*YIELD.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Failure to Yield', x))

In [23]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*LEARNER.*PERMIT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Learners Permit', x))

In [24]:
big_df_smaller["Description"].nunique()

14241

In [25]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*SPEED.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Speeding', x))

In [26]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*SPEEDING.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Speeding', x))

In [27]:
big_df_smaller["Description"].nunique()

11705

In [28]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*LAMP.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Equipment', x))

In [29]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*HEADLIGHT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Equipment', x))

In [30]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*REQUIRED.*MINIMUM.*EQUIPMENT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Equipment', x))

In [31]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*INOPERATIVE.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Equipment', x))

In [32]:
big_df_smaller["Description"].nunique()

9413

In [33]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*PARK.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Non-Moving Violation', x))

In [34]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*ABANDONING.*VEH.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Non-Moving Violation', x))

In [35]:
big_df_smaller["Description"].nunique()

9151

In [36]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*INFLUENCE.*ALCOHOL.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'DUI', x))

In [37]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*IMPAIRED.*ALCOHOL.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'DUI', x))

In [38]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAILED.*STOP.*SIGN.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [39]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAILURE.*STOP.*SIGN.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [40]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAIL.*STOP.*SIGN.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [41]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAILURE.*STOP.*SIGNAL.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [42]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAILED.*STOP.*SIGNAL.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [43]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*FAIL.*STOP.*SIGNAL.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Improper Stop', x))

In [44]:
big_df_smaller["Description"].nunique()

8767

In [45]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*ELECTRONIC.*MSG.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Texting', x))

In [46]:
big_df_smaller["Description"].nunique()

8464

### Function to replace the complete column value if a substring matches

In [47]:
# Function to replace a complete value if a substring matches
def data_cleanup(df, column_name, substring, replacement):
    mask = df[column_name].str.contains(substring, case=False)
    df.loc[mask, column_name] = replacement
    return df

In [48]:
# Replace Decription with REGISTRATION to 'Registration Violation'
column_to_modify = 'Description'
substring_to_match = 'REGISTRATION'
replacement_value = 'Registration Violation'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [49]:
# Replace Decription with REGISTRATION to 'Registration Violation'
column_to_modify = 'Description'
substring_to_match = 'UNREGISTERED'
replacement_value = 'Registration Violation'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [50]:
big_df_smaller["Description"].nunique()

8012

In [51]:
# Replace Decription with SEATBELT to 'SeatBelt - Not restrained'
column_to_modify = 'Description'
substring_to_match = 'SEATBELT'
replacement_value = 'SeatBelt - Not restrained'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [52]:
# Replace Decription with SEAT BELT to 'SeatBelt - Not restrained'
column_to_modify = 'Description'
substring_to_match = 'SEAT BELT'
replacement_value = 'SeatBelt - Not restrained'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [53]:
big_df_smaller["Description"].nunique()

7823

In [54]:
# Replace Decription with LICENSE to 'License Violation'
column_to_modify = 'Description'
substring_to_match = 'LICENSE'
replacement_value = 'License Violation'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [55]:
big_df_smaller["Description"].nunique()

7392

In [56]:
# Replace Decription with SUSPENDED to 'Suspended License or Registration'
column_to_modify = 'Description'
substring_to_match = 'SUSPENDED'
replacement_value = 'Suspended License or Registration'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [57]:
# Replace Decription with LANE to 'Lane Violation'
column_to_modify = 'Description'
substring_to_match = 'LANE'
replacement_value = 'Lane Violation'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [58]:
big_df_smaller["Description"].nunique()

6961

In [59]:
# Replace Decription with UNINSURED to 'Uninsured Vehicle'
column_to_modify = 'Description'
substring_to_match = 'UNINSURED'
replacement_value = 'Uninsured Vehicle'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [60]:
big_df_smaller["Description"].nunique()

6852

In [61]:
# Replace Decription with FAILURE TO STOP to 'Failure to stop at different circumstances'
column_to_modify = 'Description'
substring_to_match = 'FAILURE TO STOP'
replacement_value = 'Failure to stop at different circumstances'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [62]:
big_df_smaller["Description"].nunique()

6692

In [63]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*PHONE.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Using Phone', x))

In [64]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*TEXTMSG.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Using Phone', x))

In [65]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*TEXT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Using Phone', x))

In [66]:
big_df_smaller["Description"].nunique()

6432

In [67]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*TINT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Obstructed View', x))

In [68]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*OBSTRUCTED.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Obstructed View', x))

In [69]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*RECKLESS.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Reckless Driving', x))

In [70]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*ACCIDENT.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Involved in Accident', x))

In [71]:
# Use Regex to replace any description including speeding with just "Speeding"
pattern = re.compile(r'.*UNATTENDED.*DAMAGED.*', flags=re.IGNORECASE)

big_df_smaller['Description'] = big_df_smaller['Description'].apply(lambda x: re.sub(pattern, 'Involved in Accident', x))

In [72]:
big_df_smaller["Description"].nunique()

5777

In [73]:
# Replace Decription with PRIVATE PROPERTY to 'Trespassing'
column_to_modify = 'Description'
substring_to_match = 'PRIVATE PROPERTY'
replacement_value = 'Trespassing'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [74]:
big_df_smaller["Description"].nunique()

5726

In [75]:
# Replace Decription with ' TURN' to 'Unsafe Turns'. Note the blank space before TURN to avoid replacing descriptions with 'RETURN'
column_to_modify = 'Description'
substring_to_match = ' TURN'
replacement_value = 'Unsafe Turns'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [76]:
# Replace Decription with '-TURN' to 'Unsafe Turns'.
column_to_modify = 'Description'
substring_to_match = '-TURN'
replacement_value = 'Unsafe Turns'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [77]:
# Combine Decription with 'DRIVING OFF ROADWAY WHILE PASSING VEHICLE' and 'DRIVING MOTOR VEHICLE OFF ROADWAY WHILE PASSING VEHICLE'
column_to_modify = 'Description'
substring_to_match = 'OFF ROADWAY'
replacement_value = 'DRIVING OFF ROADWAY WHILE PASSING VEHICLE'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [78]:
# Replace Decription with 'CROSSWALK' to 'Defy Crosswalk rules'.
column_to_modify = 'Description'
substring_to_match = 'CROSSWALK'
replacement_value = 'Defy Crosswalk rules'
big_df_smaller = data_cleanup(big_df_smaller, column_to_modify, substring_to_match, replacement_value)

In [79]:
big_df_smaller["Description"].nunique()

5586

In [80]:
big_df_smaller["Description"].value_counts()

Speeding                                                                        301842
Registration Violation                                                          268130
License Violation                                                               184669
DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC CONTROL DEVICE INSTRUCTIONS      148229
Improper Stop                                                                   113981
                                                                                 ...  
UNLAWFUL USE OF HISTORIC PASSENGER VEH.,OR GENERAL DAILY TRANSPORTATION              1
OPERATING MOTOR VEHICLE WITH FLAT TIRE, AXLE 2 RIGHT SIDE OUTER TIRE                 1
DRIVING TO DRIVE) VEH. WHILE SO FAR IMPAIRED BY DRUGS CANNOT DRIVE SAFELY            1
FAIL APPLY W/IN 48 HOURS FOR REPLACEMENT OF AFFIX-ED CURRENT REG.PLATE,TABS          1
FAILURE OF MV OPER. DRIVING ON HWY TO  CARRY  EVID. OF REQ'D SECURITY FOR MV         1
Name: Description, Length: 5586, dtype: int

In [81]:
# Assuming 'Driver State' is a column in your DataFrame
driver_states_info = big_df_smaller['Driver State']

# Create a new column 'State Category' and set default value to 'out of state'
big_df_smaller['Driver State Category'] = 'out of state'

# Update the values based on the condition
big_df_smaller.loc[driver_states_info == 'MD', 'Driver State Category'] = 'in state'

In [82]:
# Assuming 'Driver State' is a column in your DataFrame
plate_states_info = big_df_smaller['State']

# Create a new column 'State Category' and set default value to 'out of state'
big_df_smaller['License Plate State Category'] = 'out of state'

# Update the values based on the condition
big_df_smaller.loc[plate_states_info == 'MD', 'License Plate State Category'] = 'in state'

In [83]:
big_df_smaller['Make'].replace('TOYT', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('HOND', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('CHEV', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('NISS', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('MERZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('HYUN', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('VOLK', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('ACUR', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('DODG', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('CHEVY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CHRY', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('MITS', 'MITSUBISHI', inplace=True)

In [84]:
big_df_smaller['Make'].replace('SUBA', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MAZD', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('CADI', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('VW', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('INFI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('VOLKS', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('MERC', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('LEXS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('LEXU', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('VOLV', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('PONT', 'PONTIAC', inplace=True)
big_df_smaller['Make'].replace('TOYO', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('LINC', 'LINCOLN', inplace=True)
big_df_smaller['Make'].replace('INIFINITY', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('BUIC', 'BUICK', inplace=True)
big_df_smaller['Make'].replace('MERCEDEZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('MERCEDES BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('TOY', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('OLDS', 'OLDSMOBILE', inplace=True)
big_df_smaller['Make'].replace('NISSIAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('LNDR', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('LAND ROVER', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('PORSHE', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('SATU', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('HYUND', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('RANGE ROVER', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('TOYOT', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('TOYTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHEVORLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('INTL', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('PTRB', 'PETERBILT', inplace=True)
big_df_smaller['Make'].replace('ISUZ', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('CHRYS', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('LANDROVER', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('JAGU', 'JAGUAR', inplace=True)
big_df_smaller['Make'].replace('ISU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('SCIO', 'SCION', inplace=True)
big_df_smaller['Make'].replace('LEX', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('SUZU', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('FRHT', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('SUZI', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('STRN', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('HYUNDIA', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('CHRYSTLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('CADILAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('SUB', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('PLYM', 'PLYMOUTH', inplace=True)
big_df_smaller['Make'].replace('MITZ', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('TESL', 'TESLA', inplace=True)
big_df_smaller['Make'].replace('CHEVEROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('INFIN', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('MNNI', 'MINI', inplace=True)
big_df_smaller['Make'].replace('SUBURU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MITSU', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MERCEDES-BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('HYUNDI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('TOYTOA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CRYSLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('YAMA', 'YAMAHA', inplace=True)
big_df_smaller['Make'].replace('SUBU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MAZ', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('HYUNDA', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HUMM', 'HUMMER', inplace=True)
big_df_smaller['Make'].replace('RANG', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('CAD', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('TOTY', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('MERZ BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('JAG', 'JAGUAR', inplace=True)
big_df_smaller['Make'].replace('MAZADA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('KAWK', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('TAO TAO', 'TAOTAO', inplace=True)
big_df_smaller['Make'].replace('NISSA', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('SAA', 'SAAB', inplace=True)
big_df_smaller['Make'].replace('MINI COOPER', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('MINI', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('KW', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('SATR', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('INF', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('HARLEY DAVIDSON', 'HARLEY-DAVIDSON', inplace=True)
big_df_smaller['Make'].replace('PETE', 'PETERBILT', inplace=True)
big_df_smaller['Make'].replace('HINDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('PETERBUILT', 'PETERBILT', inplace=True)
big_df_smaller['Make'].replace('IZUZU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('HYUNDAY', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('VOLKWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('FREI', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('INFINTI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('TOYOYA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('HUNDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYNDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('MERCEDEZ BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('NISAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('TSMR', 'TESLA', inplace=True)
big_df_smaller['Make'].replace('CHEVE', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('VOLKS WAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('HARLEY', 'HARLEY-DAVIDSON', inplace=True)
big_df_smaller['Make'].replace('TOTOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('PORCHE', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('HON', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HYUDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('MASE', 'MASERATI', inplace=True)
big_df_smaller['Make'].replace('HYANDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('KENW', 'KENWORTH', inplace=True)
big_df_smaller['Make'].replace('MER', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('HUYN', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('LAND', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('INFINIT', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('BWM', 'BMW', inplace=True)
big_df_smaller['Make'].replace('MADZA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('CHYRSLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('CHEVEY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('HONA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('CHYSLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('TOYTOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('MB', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('INTE', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('ACCURA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('FREIGHT', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('MERZEDES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('CHEVERLOT', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CEHVY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('TOYOA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('TYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('TOYOYTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('VOLKSWAGGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('VOLKWAGEN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('VOLSWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('VOLTSWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('FOR', 'FORD', inplace=True)
big_df_smaller['Make'].replace('SMRT', 'SMART', inplace=True)
big_df_smaller['Make'].replace('HUYNDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYU', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYND', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYN', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('CRYS', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('CHRYLSER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('CRYSTLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('HNDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HODA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('MIT', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MITSIBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MISTUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MITSUBUSHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MERC BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('MERCADES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('NEW FLYER', 'NEW-FLYER', inplace=True)
big_df_smaller['Make'].replace('HD', 'HARLEY-DAVIDSON', inplace=True)
big_df_smaller['Make'].replace('RANGE', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('SUBURA', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('ACRUA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('INFINI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('INFINITE', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('INIFINITI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('SUZ', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('ROV', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('INT', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('DUCA', 'DUCATI', inplace=True)
big_df_smaller['Make'].replace('SAT', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('CADDILAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('MITTS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('COOPER', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('ALFA ROMEO', 'ALFA-ROMEO', inplace=True)
big_df_smaller['Make'].replace('FOED', 'FORD', inplace=True)
big_df_smaller['Make'].replace('STER', 'STERLING', inplace=True)
big_df_smaller['Make'].replace('THOM', 'THOMAS', inplace=True)
big_df_smaller['Make'].replace('MAZA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('ALFA', 'ALFA-ROMEO', inplace=True)
big_df_smaller['Make'].replace('MECURY', 'MERCURY', inplace=True)
big_df_smaller['Make'].replace('INIFINITI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('LEXIS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('HODNA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('SUZIKI', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('DOGE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('MITSUBSHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('HIUNDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYUNAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('DIDGE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('HYUANDI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('UNK', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('HOMDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HIONDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('CHEVROLETE', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('NIS', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('CADILLIAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('VOLKSW', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('SION', 'SCION', inplace=True)
big_df_smaller['Make'].replace('VOLSWAGEN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('VOLKSWAGAN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('LINCON', 'LINCOLN', inplace=True)
big_df_smaller['Make'].replace('TOOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHEROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('MINNI', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('WHITE', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('INFINITY', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('CHRISLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('TOTOYA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('HUNDAY', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('CHYR', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('FIRD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('SUSUKI', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('MITZUBISHI', 'MITZUBISHI', inplace=True)
big_df_smaller['Make'].replace('HYUANDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('MIFU', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('None', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CRY', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('HARL', 'HARLEY-DAVIDSON', inplace=True)
big_df_smaller['Make'].replace('WEST', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOYOTA SCION', 'SCION', inplace=True)
big_df_smaller['Make'].replace('BUIK', 'BUICK', inplace=True)
big_df_smaller['Make'].replace('SUBAR', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('VOLV0', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('PREM', 'PREMIER', inplace=True)
big_df_smaller['Make'].replace('NSSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('HYUNDAU', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('TOYORA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('NISSAM', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('MITUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MITSHUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('BENT', 'BENTLEY', inplace=True)
big_df_smaller['Make'].replace('MADZ', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('CHVY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('LEXSUS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('INTER', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('SSR', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('VIP', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MRAEZ', 'MASERATI', inplace=True)
big_df_smaller['Make'].replace('MISSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('HONAD', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('WORK', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHECY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CHEVROLEY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CHRSYLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('ORD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('MITSUBISH', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('VOLS', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('MERCZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('MITIS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('CAMRY', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHRISLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('NONE', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('INFINITY', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('MITZUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MREZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('TOYATA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('SABB', 'SAAB', inplace=True)
big_df_smaller['Make'].replace('CHEVRLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('PORSH', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('FRT', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('SCOOTER', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('BMX', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MIST', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('KAWA', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('ALTIMA', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('ISZU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('FOD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('MERCEDS', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('ONDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('BENTLY', 'BENTLEY', inplace=True)
big_df_smaller['Make'].replace('ACCORD', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('VK', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('KAW', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('MD', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('SUNNY', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('HOMD', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('COROLLA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('MECEDES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('NISSSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('TPYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('SUBR', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('XX', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('FERR', 'FERRARI', inplace=True)
big_df_smaller['Make'].replace('WV', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('VOKSWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('LEXUX', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('WOLKSWAGEN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('KAWASKI', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('TOYPTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('NISSON', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('FORK', 'FORD', inplace=True)
big_df_smaller['Make'].replace('CADALIC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('VOLKSWA', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('MITTSUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('HUYUNDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('RANGEROVER', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('IZU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('HOME', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('KENILWORTH', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('DODGE RAM', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('VOVLO', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('LUXUS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('WESTERN STAR', 'WESTERN-STAR', inplace=True)
big_df_smaller['Make'].replace('MBENZ', 'MERCEDES', inplace=True)

In [85]:
big_df_smaller['Make'].replace('LEXAS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('NISSN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('STLG', 'STERLING', inplace=True)
big_df_smaller['Make'].replace('MADA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('MAD', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('VOLKWAG', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('TELSA', 'TESLA', inplace=True)
big_df_smaller['Make'].replace('PORSCH', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('PLYMOTH', 'PLYMOUTH', inplace=True)
big_df_smaller['Make'].replace('LANR', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('TAO', 'TAOTAO', inplace=True)
big_df_smaller['Make'].replace('CHEVYROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('DOGDE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('VOL', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('TAIWAN GOLDEN B', 'TAIWAN GOLDEN BEE', inplace=True)
big_df_smaller['Make'].replace('MITSUB', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MERCE', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('BNW', 'BMW', inplace=True)
big_df_smaller['Make'].replace('LICOLN', 'LINCOLN', inplace=True)
big_df_smaller['Make'].replace('MERCED', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('CHRYLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('LINCOLIN', 'LINCOLN', inplace=True)
big_df_smaller['Make'].replace('MERECEDES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('TOYTO', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHVROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('EPO', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHEVR', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('TOT', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('BWI', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MISS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('TOTYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('NISAAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('CIVIC', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('TEST', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHEVOLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('MITISUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('TPYPTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('OLDMOBILE', 'OLDSMOBILE', inplace=True)
big_df_smaller['Make'].replace('CHY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('YOYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHEY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('SMAR', 'MART', inplace=True)
big_df_smaller['Make'].replace('HONDS', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HYUNADAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('GENS', 'GENESIS', inplace=True)
big_df_smaller['Make'].replace('ASTON MARTIN', 'ASTON-MARTIN', inplace=True)
big_df_smaller['Make'].replace('CADILLA', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('HUND', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('FTL', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('SUBUARU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MITSUBIHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('CEHV', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('RAV4', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('BUCK', 'BUICK', inplace=True)
big_df_smaller['Make'].replace('PONIAC', 'PONTIAC', inplace=True)
big_df_smaller['Make'].replace('MART', 'SMART', inplace=True)
big_df_smaller['Make'].replace('MERCEDEES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('FROD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('VOLKESWAGON', 'VOLKWAGON', inplace=True)
big_df_smaller['Make'].replace('SURE', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('JEP', 'JEEP', inplace=True)
big_df_smaller['Make'].replace('MERCEDEZ-BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('HUDS', 'HUDSON', inplace=True)
big_df_smaller['Make'].replace('TOYOTOA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('NIISSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('VOLKWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('CEHVROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('HONDA`', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('TOMAS', 'THOMAS', inplace=True)
big_df_smaller['Make'].replace('MERDZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('CADDILLAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('VOLKSWAG', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('CHEVROET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('HAUL', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHR', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('MERCEDE', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('ASTO', 'ASTON-MARTIN', inplace=True)
big_df_smaller['Make'].replace('RANGE ROV', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('JETTA', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('LODA', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TAGA', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('FRIEGHTLINER', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('KAWI', 'KAWASAKI', inplace=True)
big_df_smaller['Make'].replace('TOYOTS', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('SPRINTER', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('PORS', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('HIUNDAY', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('STAR', 'WESTERN-STAR', inplace=True)
big_df_smaller['Make'].replace('RANG ROVER', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('CADDI', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('FRD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('STRG', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('GORD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('VOKS', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('HUNDI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('BUEL', 'BUELL', inplace=True)
big_df_smaller['Make'].replace('TAOT', 'TAOTAO', inplace=True)
big_df_smaller['Make'].replace('HOBDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('ATV', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOYOTA/SCION', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('PONTAIC', 'PONTIAC', inplace=True)
big_df_smaller['Make'].replace('UTIL', 'UTILITY', inplace=True)
big_df_smaller['Make'].replace('DATS', 'DATSUN', inplace=True)
big_df_smaller['Make'].replace('CEV', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOYOTSA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('DAEW', 'DAEWOO', inplace=True)
big_df_smaller['Make'].replace('GILG', 'GILGEE', inplace=True)
big_df_smaller['Make'].replace('MOPED', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MBW', 'BMW', inplace=True)
big_df_smaller['Make'].replace('HIND', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('JEEF', 'JEEP', inplace=True)
big_df_smaller['Make'].replace('SUBURAU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('LEUS', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('CRAN', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MASERATTI', 'MASERATI', inplace=True)
big_df_smaller['Make'].replace('MERCDES', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('INIFITI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('GNC', 'GM', inplace=True)
big_df_smaller['Make'].replace('CADALLAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('TOYOTO', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('MITSBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('HUN', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('TIYITA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('HOINDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HYUD', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('SUBARY', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MITSUBISSHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('ELANTRA', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('BLUE', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('OLDS MOBILE', 'OLDSMOBILE', inplace=True)
big_df_smaller['Make'].replace('JAQUAR', 'JAGUAR', inplace=True)
big_df_smaller['Make'].replace('CEVROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('MISTSUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('YAHMAHA', 'YAMAHA', inplace=True)
big_df_smaller['Make'].replace('TOUOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('FRTL', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('DOD', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('JONDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('UHAUL', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('VOLKE', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('RIDE ON', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHEVT', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('MZDA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('TIYT', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('TRUCK', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHRYSL', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('SATUR', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('CHEVU', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('PORC', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('2016', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOOYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('VOLKW', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('IHC', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MRZ', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MTIS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('HYNDIA', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('FRGHT', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('INFNITY', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('POINTIAC', 'PONTIAC', inplace=True)
big_df_smaller['Make'].replace('AURA', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('CHEVROLT', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('BIGT', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('HYUNDUI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('AUCRA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('INFNITI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('TTOYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('VULC', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHE', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('HOONDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('FPRD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('SUBARA', 'SUBARY', inplace=True)
big_df_smaller['Make'].replace('BISSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('PORCH', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('FORDQ', 'FORD', inplace=True)
big_df_smaller['Make'].replace('MITSIBUSHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('YAM', 'YAMAHA', inplace=True)
big_df_smaller['Make'].replace('TOYA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('TOY0TA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('MITZU', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MIITS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('ROUTER', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MINICOOPER', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('JEEO', 'JEEP', inplace=True)
big_df_smaller['Make'].replace('MEBE', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('LANDR', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('ACRA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('HONDAY', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('MISUBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('BRIM', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('GENISIS', 'GENESIS', inplace=True)
big_df_smaller['Make'].replace('DDODGE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('PASSAT', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('NISSAB', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('CHRYSER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('LAMBO', 'LAMBORGHINI', inplace=True)
big_df_smaller['Make'].replace('YUKON', 'GM', inplace=True)
big_df_smaller['Make'].replace('MERCERY', 'FORD', inplace=True)
big_df_smaller['Make'].replace('MAXDA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('VWOLKS', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('DORD', 'FORD', inplace=True)
big_df_smaller['Make'].replace('HUMAN', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('OYOTA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHEVROLE', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('Z71', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('TOYOAT', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('OLSMOBILE', 'OLDSMOBILE', inplace=True)
big_df_smaller['Make'].replace('SUBARY', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('TRAILER', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHEVOROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('NISSANA', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('MURCURY', 'FORD', inplace=True)
big_df_smaller['Make'].replace('MNI', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('POR', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('KIS', 'KIA', inplace=True)
big_df_smaller['Make'].replace('LANDOVER', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('THOMAS BUILT', 'THOMAS', inplace=True)
big_df_smaller['Make'].replace('MAZFA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('ACUA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('SILVER', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('VOKSWAGEN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('GOLD', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MEZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('TOYITA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('AVURA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('LOAD', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('FHRT', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('HYUMDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('DODE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('HUYANDI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('SABARU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('SPNR', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('VOLO', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('RANGE RVR', 'RANGE-ROVER', inplace=True)
big_df_smaller['Make'].replace('VLKSWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('QUALITY', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace({'CRCA': 'UNKNOWN', 'QUALITY': 'UNKNOWN'}, inplace=True)
big_df_smaller['Make'].replace('THMS', 'THOMAS', inplace=True)
big_df_smaller['Make'].replace('KYMC', 'KYMCO', inplace=True)
big_df_smaller['Make'].replace('BLACK', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MERCRUY', 'MERCURY', inplace=True)
big_df_smaller['Make'].replace('CHEVTOLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('HYUNI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('CHYS', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('INFT', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('NAZDA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('HUY', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('CADIL', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('MITUSBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MERCADEZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('VOLK W', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('LINCOL', 'LINCOLN', inplace=True)
big_df_smaller['Make'].replace('PRIUS', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('CHEV.', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('SMART CAR', 'SMART', inplace=True)
big_df_smaller['Make'].replace('HONDDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('IZUSU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('TOMO', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('LEXUZ', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('STARCRAFT', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('NISSA N', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('LAND ROV', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('LARO', 'LAND-ROVER', inplace=True)
big_df_smaller['Make'].replace('KAUF', 'KAUFMAN', inplace=True)
big_df_smaller['Make'].replace('KIDA', 'KIA', inplace=True)
big_df_smaller['Make'].replace('BRMR', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('ROLLS ROYCE', 'ROLLS-ROYCE', inplace=True)
big_df_smaller['Make'].replace('VOLKSWAGIN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('BMW`', 'BMW', inplace=True)
big_df_smaller['Make'].replace('MITI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('ACUTA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('HHONDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('4DR', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('SATRN', 'SATURN', inplace=True)
big_df_smaller['Make'].replace('ALFA ROMERO', 'ALFA-ROMEO', inplace=True)
big_df_smaller['Make'].replace('OTHR', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('PETER', 'PETERBILT', inplace=True)
big_df_smaller['Make'].replace('EGIL', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('INIF', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('ACYRA', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('SUZIUKI', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('VOLVA', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('TOYOVAL2005', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('SCI', 'SCION', inplace=True)
big_df_smaller['Make'].replace('VOVL', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('2S', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('CHRYSLTER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('MADZDA', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('MITSUBISHUI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('IFINITI', 'INFINITI', inplace=True)
big_df_smaller['Make'].replace('SUBAU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('ISUSU', 'ISUZU', inplace=True)
big_df_smaller['Make'].replace('MITSB', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MITISBISHI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MTS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('PETER BUILT', 'PETERBILT', inplace=True)
big_df_smaller['Make'].replace('GENU', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('D0DGE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('YAHAMA', 'YAMAHA', inplace=True)
big_df_smaller['Make'].replace('MERCU', 'MERCURY', inplace=True)
big_df_smaller['Make'].replace('HYINDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('HYUNADI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('NIZZAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('ORIO', 'SAAB', inplace=True)
big_df_smaller['Make'].replace('INTERN', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('CHRVY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CEVY', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CHEVROLETTE', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('INTR', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('MITSI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('EAGL', 'EAGLE', inplace=True)
big_df_smaller['Make'].replace('CHRUSLER', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('VOVO', 'VOLVO', inplace=True)
big_df_smaller['Make'].replace('INTERNATION', 'INTERNATIONAL', inplace=True)
big_df_smaller['Make'].replace('TRANSIT', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOYOVAL2011', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('HAWK', 'STUDEBAKER', inplace=True)
big_df_smaller['Make'].replace('WSTR', 'WESTERN STAR', inplace=True)
big_df_smaller['Make'].replace('BAJA', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('TAIZHOU CITY', 'GEELY', inplace=True)
big_df_smaller['Make'].replace('HANDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('VONROAD', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('HUYANDAI', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('MITISHIBI', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('FRIGHTLINER', 'FREIGHTLINER', inplace=True)
big_df_smaller['Make'].replace('MW', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('POTIAC', 'PONTIAC', inplace=True)
big_df_smaller['Make'].replace('9EC5338', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('WESTERN STAR', 'WESTERN-STAR', inplace=True)
big_df_smaller['Make'].replace('MALIBU', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('GLAV', 'GLAVAL', inplace=True)
big_df_smaller['Make'].replace('VOK', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('LOTU', 'LOTUS', inplace=True)
big_df_smaller['Make'].replace('PNT', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('SUV', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MURRAY', 'GORDON-MURRAY', inplace=True)
big_df_smaller['Make'].replace('BMI', 'BMW', inplace=True)
big_df_smaller['Make'].replace('AUD', 'AUDI', inplace=True)
big_df_smaller['Make'].replace('PORSCE', 'PORSCHE', inplace=True)
big_df_smaller['Make'].replace('MOBED', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('VOLSKWAGEN', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('CHEVY GEO', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('LEXUES', 'LEXUS', inplace=True)
big_df_smaller['Make'].replace('ISSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('MITSIBISH', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('MINI COOP', 'MINI-COOPER', inplace=True)
big_df_smaller['Make'].replace('TOPB', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('TOYOVA', 'TOYOTA', inplace=True)
big_df_smaller['Make'].replace('BRMA', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('4D', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('DDOGE', 'DODGE', inplace=True)
big_df_smaller['Make'].replace('SONATA', 'HYUNDAI', inplace=True)
big_df_smaller['Make'].replace('MAZDZ', 'MAZDA', inplace=True)
big_df_smaller['Make'].replace('CADILIAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('SUBRARU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('MECEDEZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('CARRY ON', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('NISSNA', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('HONNDA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('HOME MADE', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('MIS', 'MITSUBISHI', inplace=True)
big_df_smaller['Make'].replace('2010', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('CHEBROLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('APRI', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('ROVER', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('WRKH', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('M BENZ', 'MERCEDES', inplace=True)
big_df_smaller['Make'].replace('HONGA', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('SUBSRU', 'SUBARU', inplace=True)
big_df_smaller['Make'].replace('CADDY', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('CEHVORLET', 'CHEVROLET', inplace=True)
big_df_smaller['Make'].replace('CHRYSLEY', 'CHRYSLER', inplace=True)
big_df_smaller['Make'].replace('SENTRA', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('HONDVAL2008', 'HONDA', inplace=True)
big_df_smaller['Make'].replace('JEEK', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('NC', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('NINGBO', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('GMX', 'UNKNOWN', inplace=True)
big_df_smaller['Make'].replace('ACU', 'ACURA', inplace=True)
big_df_smaller['Make'].replace('LAMO', 'LAMBORGINI', inplace=True)
big_df_smaller['Make'].replace('SUZIK', 'SUZUKI', inplace=True)
big_df_smaller['Make'].replace('WOLKSWAGON', 'VOLKSWAGON', inplace=True)
big_df_smaller['Make'].replace('CADIALLAC', 'CADILLAC', inplace=True)
big_df_smaller['Make'].replace('NNISSAN', 'NISSAN', inplace=True)
big_df_smaller['Make'].replace('BICYCLE', 'UNKNOWN', inplace=True)

In [86]:
big_df_smaller['Time Of Stop'] = pd.to_datetime(big_df_smaller['Time Of Stop'])
big_df_smaller['Hour of Stop'] = big_df_smaller['Time Of Stop'].dt.hour
random_forest_df = big_df_smaller.drop(['Date Of Stop','Driver State','State','Time Of Stop'], axis = 1)
# Check if there are any null values in the entire DataFrame
if random_forest_df.isnull().values.any():
    print("There are null values in the DataFrame.")
else:
    print("There are no null values in the DataFrame.")

There are null values in the DataFrame.


In [87]:
#Example binning makes
makes_count=random_forest_df["Make"].value_counts()
cutoff_value = 10000
# Create a list of application types to be replaced with 'Other'
makes_to_replace = makes_count[makes_count < cutoff_value].index.tolist()

# Replace in dataframe
for app in makes_to_replace:
    random_forest_df["Make"] = random_forest_df["Make"].replace(app,"Other")

# Check to make sure binning was successful
random_forest_df["Make"].value_counts()

TOYOTA        295871
HONDA         248798
FORD          154407
NISSAN        128687
CHEVROLET     123774
Other         106084
HYUNDAI        61410
DODGE          55040
ACURA          51495
MERCEDES       51488
BMW            47824
LEXUS          39623
JEEP           38825
VOLKSWAGON     34754
MAZDA          33112
SUBARU         26173
CHRYSLER       25411
KIA            24920
GMC            21682
INFINITI       20010
MITSUBISHI     18893
AUDI           17564
CADILLAC       14951
VOLVO          13572
BUICK          11424
Name: Make, dtype: int64

In [88]:
# Assuming 'Driver State' is a column in your DataFrame
car_colors = random_forest_df['Color']

# Create a new column 'State Category' and set default value to 'out of state'
random_forest_df['Color Type'] = 'Neutral'

# Define a list of colorful colors
colorful_colors = ['RED', 'ORANGE', 'YELLOW', 'GREEN', 'BLUE', 'BLUE, DARK',
                   'MAROON', 'BLUE, LIGHT', 'GREEN, DK', 'GREEN, LGT', 'PURPLE',
                   'MULTICOLOR', 'PINK', 'CAMOUFLAGE']

# Update the values based on the condition using isin
random_forest_df.loc[car_colors.isin(colorful_colors), 'Color Type'] = 'Colorful'

In [89]:
description_info = random_forest_df['Description'].value_counts()

selected_descriptions = description_info[description_info > 10000].index.tolist()

random_forest_df.loc[~random_forest_df['Description'].isin(selected_descriptions), 'Description'] = 'Other'

In [92]:
# Write the processed data to a CSV
processed_file_path = "data\Traffic_Violations_Processed.csv"
big_df_smaller.to_csv(processed_file_path, index=False)