In [1]:
import pandas as pd
import re
import os

In [2]:
# This function creates the path to the main Excel file of this project
# Uses os library methods to ensure the file path works in both local and GitHub CI/CD environments
def create_file_path(file_relative_path):
    # Store the xlsx file dir into a var
    dir = '../data'
    
    # Iterate under dir data to have all files
    for filename in os.listdir(dir):
        # Condition to confirm which file is == year_make_model_df.xlsx
        if filename.endswith(file_relative_path):
            # Join the dir + filename to create the path to year_make_model_df.xlsx file  
            xlsx_file_fullpath = os.path.join(dir, filename)
            # Print the result
            print(xlsx_file_fullpath)
    return xlsx_file_fullpath

In [3]:
# Read excel 
df = pd.read_excel(create_file_path('year_make_model_df.xlsx'))

../data\year_make_model_df.xlsx


In [4]:
# Converting all values in column Model to string, as some of models are numbers, for instance BMWs 318, 328, 525
df = df.astype({'Model':'string'})

In [5]:
# Confirm if the Dtype from the column Model changed to string.
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3463 entries, 0 to 3462
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Year            3463 non-null   int64 
 1   Make            3463 non-null   object
 2   Model           3463 non-null   string
 3   Security        3156 non-null   object
 4   ParameterReset  938 non-null    object
dtypes: int64(1), object(3), string(1)
memory usage: 135.4+ KB


In [7]:
# Function to create list with unique items from given column
def data_from_column(df, column):
    # List to append the items
    data_column_list =[]

    # Iterate over given column
    for data in df[column]:
        # Append the items from the given column
        data_column_list.append(data)

    # Return a set with unique items from given column
    return set(data_column_list)

In [8]:
# Filter only Ford models and create a df.
df_ford = df[df['Make'] == 'Ford']

In [9]:
# Create a set with unique values from column Model
print(data_from_column(df_ford, "Model"))

{'Explorer (4dr)', 'Windstar', 'FUSION', 'F-150 Heritage', 'ESCAPE (Push to Start)', 'Expedition', 'Ranger (2.3L, 3.0L, & 4.0L)', 'Freestyle', 'FUSION (Hybrid)', 'FIESTA', 'Five Hundred', 'Explorer Sport Trac', 'GT', 'Flex', 'C-MAX (Hybrid)', 'TRANSIT CONNECT', 'Escape', 'Crown Victoria', 'C-MAX (Hybrid) (Push to Start)', 'FLEX (Push to Start)', 'Taurus X', 'F-150 Harley-Davidson', 'F-150', 'Explorer Sport (2dr)', 'Excursion', 'FOCUS EV', 'Escape (Hybrid)', 'Edge', 'Taurus', 'Freestar', 'Fusion (Hybrid)', 'F-SERIES Super Duty (F-350/450/650/750)', 'Focus', 'Taurus (Duratec & SHO only)', 'TRANSIT', 'FUSION EV', 'Fusion', 'Contour (V6-only)', 'Edge(Push to Start)', 'Ranger (3.0L & 4.0L only)', 'Mustang', 'Thunderbird', 'F-250 (under 8500# GVW)', 'E-Series', 'F-Series > 8500 (Stripped Chassis/Cab Chassis)'}


F-SERIES Super Duty (F-350/450/650/750):
These models are currently grouped in the same row, but they should be separated into different rows because each model has distinct configurations, even though they share similar security system

In [10]:
# Check mark with True the rows with the model F-SERIES Super Duty (F-350/450/650/750)
df_ford['F-Series SD'] = df_ford['Model'].str.contains(r'F-350/450/650/750', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ford['F-Series SD'] = df_ford['Model'].str.contains(r'F-350/450/650/750', regex=True)


In [11]:
# Store in a df only the models with the string F-SERIES Super Duty (F-350/450/650/750)
df_ford_fseries_sd = df_ford[df_ford['F-Series SD'] == True]

In [12]:
# Explode in separate lines F-Series from 350 - 750
df_ford_model_exploded = df_ford_fseries_sd.assign(Model=df_ford_fseries_sd["Model"].str.split('/')).explode("Model")

In [13]:
# List to append the models
fseries_list = []

# Loop to iterate under column model
for model in df_ford_model_exploded['Model']:
    # Condition to update specific models
    if model in ['450', '650', '750)']:
        fseries_updated = 'F-' + model + ' Super Duty'
        fseries_list.append(fseries_updated)
    else:
        fseries_list.append(model)

In [14]:
# Update the df after updating f-series
df_ford_model_exploded['Model'] = fseries_list

In [15]:
# Set with unique models after update
print(data_from_column(df_ford_model_exploded, 'Model'))

{'F-450 Super Duty', 'F-650 Super Duty', 'F-SERIES Super Duty (F-350', 'F-750) Super Duty'}


### Changes needed:
- F-SERIES Super Duty (F-350 -> F-350 Super Duty
- F-750) Super Duty -> F-750 Super Duty
- Drop the indexes containing the old model: 2805, 3037, 3270

In [16]:
# Update the models based on the above changes
df_ford_model_exploded['Model'] = df_ford_model_exploded['Model'].replace({
    'F-SERIES Super Duty (F-350': 'F-350 Super Duty',
    'F-750) Super Duty': 'F-750 Super Duty'},
    regex=False)

In [17]:
# Drop column F-Series SD
df_ford_dropped_fseries_column = df_ford_model_exploded.drop(columns=['F-Series SD'])

In [18]:
# Concatenate with the main df
df_ford_concatenate_after_models_exploded = pd.concat([df_ford, df_ford_dropped_fseries_column])

In [19]:
# Reset index
df_ford_concatenate_reset_index = df_ford_concatenate_after_models_exploded.reset_index(drop=True)

In [20]:
# Check with rows still has the model F-SERIES Super Duty (F-350/450/650/750)
fseries_indexes = df_ford_concatenate_reset_index['Model'].str.contains(r'F-350/450/650/750', regex=True)

In [21]:
# Confirm the indexes
fseries_indexes[fseries_indexes == True]

212    True
231    True
251    True
Name: Model, dtype: bool

In [22]:
# Drop indexes were exploded
df_ford_drop_fseries_indexes = df_ford_concatenate_reset_index.drop(index=[212, 231, 251])

In [23]:
# Print the models to confirm what else should be chaged
print(data_from_column(df_ford_drop_fseries_indexes, "Model"))

{'Explorer (4dr)', 'Windstar', 'FUSION', 'F-150 Heritage', 'F-650 Super Duty', 'ESCAPE (Push to Start)', 'Expedition', 'Ranger (2.3L, 3.0L, & 4.0L)', 'Freestyle', 'FUSION (Hybrid)', 'FIESTA', 'Five Hundred', 'Explorer Sport Trac', 'GT', 'Flex', 'C-MAX (Hybrid)', 'TRANSIT CONNECT', 'Escape', 'Crown Victoria', 'C-MAX (Hybrid) (Push to Start)', 'FLEX (Push to Start)', 'Taurus X', 'F-150 Harley-Davidson', 'F-150', 'Explorer Sport (2dr)', 'Excursion', 'FOCUS EV', 'Escape (Hybrid)', 'Edge', 'F-350 Super Duty', 'Taurus', 'Freestar', 'Fusion (Hybrid)', 'Focus', 'Taurus (Duratec & SHO only)', 'TRANSIT', 'FUSION EV', 'F-750 Super Duty', 'Fusion', 'Contour (V6-only)', 'Edge(Push to Start)', 'Ranger (3.0L & 4.0L only)', 'Mustang', 'Thunderbird', 'F-250 (under 8500# GVW)', 'F-450 Super Duty', 'E-Series', 'F-Series > 8500 (Stripped Chassis/Cab Chassis)'}


### No changes needed in the rest of the models

In [24]:
# Print the items in the security column to check for changes needed
data_from_column(df_ford_drop_fseries_indexes, 'Security')

{'Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module)',
 'Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module)',
 'Fleet Vehicles: PATS Type G (Instrument Cluster)\nNon-Fleet Vehicles: PATS Type C (Instrument Cluster)',
 'Keyed Ignition: PATS Type A (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module)',
 'Keyed Ignition: PATS Type C (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module)',
 'PATS Type A (Instrument Cluster)',
 'PATS Type A (Stand Alone PATS Module)',
 'PATS Type B (Body Control Module)',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Instrument Cluster)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type C (Remote Function Actuator / Keyless Vehicle Module)',
 'PATS Type E (Powertrain Con

### Changes needed:
- There are 2 different models under 2 lines, they will be separated and the demiliter will be \n:
    * Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module),
    * Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module),
    * Fleet Vehicles: PATS Type G (Instrument Cluster)\nNon-Fleet Vehicles: PATS Type C (Instrument Cluster),
    * Keyed Ignition: PATS Type A (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module),
    * Keyed Ignition: PATS Type C (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module),
- The strings before (:) will moved from column security to column model as they are related to the models.
- This is the result on how it should be after updatings:  
Year: 2000  
Make: Ford  
Model: F-XX (Built February 1st or earlier)   
Security: PATS Type G (Instrument Cluster)

In [25]:
# Fuction to create df
def create_df(df, column, string):
    df_new = df[df[column] == string]
    return df_new

In [26]:
# Function to explode in different lines based on a given pattern
def explode_lines(df, column, pattern):
    # Make a copy of the original df
    df_exploded = df.copy()
    # Split the string in 2 based on the delimiter given and 
    # The result was given inside a list 
    df_exploded[column] = df_exploded[column].str.split(pattern)
    # Explode in 2 lines based on the list items quantity 
    # ['item1', 'item2'], exploded in 2 lines
    df_exploded = df_exploded.explode(column)
    # Return the df exploded
    return df_exploded

In [27]:
# Function to move the strings before char (:) from column security to column model
def move_string_to_column_model(df):
    # Function to explode in separate lines strings from security column
    df_exploded = explode_lines(df, 'Security', '\n')
    # Reset the index
    df_resetted_index = df_exploded.reset_index(drop=True)
    # Split the string in the security column using the (:) as the delimeter
    df_resetted_index['Security'] = df_resetted_index['Security'].str.split(":")

    # Loop to iterate under the df indexes
    for idx in df_resetted_index.index:
        # Method to move the manufactured date to the model column
        df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
        # Remove manufactured date from security column
        df_resetted_index['Security'][idx].pop(0)

    # Extracts the string from list under security column  
    df_security_list = df_resetted_index['Security'].str.join(' ')

    # Update the df
    df_resetted_index['Security'] = df_security_list

    # Return a df with the strings before (:) moved to column model
    return df_resetted_index

In [28]:
# Call the function to create a df with the string containing the strings "Built February..."
df_ford_contour = create_df(df_ford_drop_fseries_indexes, 'Security', 'Built February 1st or earlier: PATS Type A (Stand Alone PATS Module)\nBuilt February 2nd or later: PATS Type E (Powertrain Control Module)')

In [29]:
# Call the function to move the strings from column security to column model from MODEL: Contour
df_ford_contour_moved_strings = move_string_to_column_model(df_ford_contour)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [30]:
# Print the df after moving the strings
df_ford_contour_moved_strings

Unnamed: 0,Year,Make,Model,Security,ParameterReset,F-Series SD
0,1998,Ford,Contour (V6-only) (Built February 1st or earlier),PATS Type A (Stand Alone PATS Module),Parameter Reset Not Required,False
1,1998,Ford,Contour (V6-only) (Built February 2nd or later),PATS Type E (Powertrain Control Module),Parameter Reset Not Required,False


In [31]:
# Call the function to create the df with given string from column security
df_ford_explorer = create_df(df_ford_drop_fseries_indexes, "Security", 'Built July 23rd 2000 or Earlier: PATS Type B (Stand Alone PATS Module)\nBuilt July 24th 2000 or Later: PATS Type E (Powertrain Control Module)')

In [32]:
# Create a list with indexes
ford_explorer_indexes_list = list(df_ford_explorer.index)

### Append list function reason:
The models that were modified as mentioned above will be removed from the main DataFrame. For this, it is necessary to save the indexes of the rows containing the models prior to the updates. A function will be created to append these indexes to a unique list.

In [33]:
# Function to append/update the indexes to an existent list 
def append_indexes_to_list(df, existent_ford_model_indexes_list):
    # Create a new list with indexes from the new df
    ford_model_indexes_list = list(df.index)
    # Loop to iterate over the existent list
    for idx in existent_ford_model_indexes_list:
        # Append the indexes to the new list
        ford_model_indexes_list.append(idx)
    # Return a list wit the items appended to the new list
    return ford_model_indexes_list

In [34]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_one = append_indexes_to_list(df_ford_contour, ford_explorer_indexes_list)

In [36]:
# Call the function to move the strings from column security to column model from MODEL: Explorer
df_ford_explorer_moved_strings = move_string_to_column_model(df_ford_explorer)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [37]:
# Print the df after moving the strings
df_ford_explorer_moved_strings

Unnamed: 0,Year,Make,Model,Security,ParameterReset,F-Series SD
0,2001,Ford,Explorer Sport (2dr) (Built July 23rd 2000 or ...,PATS Type B (Stand Alone PATS Module),Parameter Reset Required,False
1,2001,Ford,Explorer Sport (2dr) (Built July 24th 2000 or ...,PATS Type E (Powertrain Control Module),Parameter Reset Required,False
2,2001,Ford,Explorer Sport Trac (Built July 23rd 2000 or E...,PATS Type B (Stand Alone PATS Module),Parameter Reset Required,False
3,2001,Ford,Explorer Sport Trac (Built July 24th 2000 or L...,PATS Type E (Powertrain Control Module),Parameter Reset Required,False


In [38]:
# Call the function to create the df with given string from column security
df_ford_eseries = create_df(df_ford_drop_fseries_indexes, "Security", "Fleet Vehicles: PATS Type G (Instrument Cluster)\nNon-Fleet Vehicles: PATS Type C (Instrument Cluster)")

In [39]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_two = append_indexes_to_list(df_ford_eseries, indexes_list_update_one)

In [40]:
# Call the function to move the strings from column security to column model from MODEL: E-Series
df_ford_eseries_moved_strings = move_string_to_column_model(df_ford_eseries)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [41]:
# Print the df after moving the strings
df_ford_eseries_moved_strings

Unnamed: 0,Year,Make,Model,Security,ParameterReset,F-Series SD
0,2008,Ford,E-Series (Fleet Vehicles),PATS Type G (Instrument Cluster),Parameter Reset Required,False
1,2008,Ford,E-Series (Non-Fleet Vehicles),PATS Type C (Instrument Cluster),Parameter Reset Required,False
2,2009,Ford,E-Series (Fleet Vehicles),PATS Type G (Instrument Cluster),Parameter Reset Required,False
3,2009,Ford,E-Series (Non-Fleet Vehicles),PATS Type C (Instrument Cluster),Parameter Reset Required,False
4,2010,Ford,E-Series (Fleet Vehicles),PATS Type G (Instrument Cluster),Parameter Reset Required,False
5,2010,Ford,E-Series (Non-Fleet Vehicles),PATS Type C (Instrument Cluster),Parameter Reset Required,False


In [42]:
# Call the function to create the df with given string from column security
df_ford_fiesta = create_df(df_ford_drop_fseries_indexes, 'Security', 'Keyed Ignition: PATS Type A (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module)')

In [43]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_three = append_indexes_to_list(df_ford_fiesta, indexes_list_update_two)

In [44]:
# Call the function to move the strings from column security to column model from MODEL: Fiesta
df_ford_fiesta_moved_strings = move_string_to_column_model(df_ford_fiesta)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [45]:
# Print the df after moving the strings
df_ford_fiesta_moved_strings

Unnamed: 0,Year,Make,Model,Security,ParameterReset,F-Series SD
0,2011,Ford,FIESTA (Keyed Ignition),PATS Type A (Instrument Cluster),Parameter Reset Required,False
1,2011,Ford,FIESTA (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
2,2012,Ford,FIESTA (Keyed Ignition),PATS Type A (Instrument Cluster),Parameter Reset Required,False
3,2012,Ford,FIESTA (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
4,2013,Ford,FIESTA (Keyed Ignition),PATS Type A (Instrument Cluster),Parameter Reset Required,False
5,2013,Ford,FIESTA (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False


In [46]:
# Call the function to create the df with given string from column security
df_ford_keyed_ignition = create_df(df_ford_drop_fseries_indexes, 'Security', 'Keyed Ignition: PATS Type C (Instrument Cluster)\nPush To Start: PATS Type C (Remote Function Actuator / Keyless Vehicle Module)')

In [47]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_four = append_indexes_to_list(df_ford_keyed_ignition, indexes_list_update_three)

In [48]:
# Call the function to move the strings from column security to column model from MODEL: Taurus
df_ford_keyed_ignition_moved_string = move_string_to_column_model(df_ford_keyed_ignition)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [49]:
# Print the df after moving the strings
df_ford_keyed_ignition_moved_string

Unnamed: 0,Year,Make,Model,Security,ParameterReset,F-Series SD
0,2010,Ford,Taurus (Keyed Ignition),PATS Type C (Instrument Cluster),Parameter Reset Required,False
1,2010,Ford,Taurus (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
2,2011,Ford,Taurus (Keyed Ignition),PATS Type C (Instrument Cluster),Parameter Reset Required,False
3,2011,Ford,Taurus (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
4,2012,Ford,Focus (Keyed Ignition),PATS Type C (Instrument Cluster),Parameter Reset Required,False
5,2012,Ford,Focus (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
6,2012,Ford,Taurus (Keyed Ignition),PATS Type C (Instrument Cluster),Parameter Reset Required,False
7,2012,Ford,Taurus (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False
8,2013,Ford,Focus (Keyed Ignition),PATS Type C (Instrument Cluster),Parameter Reset Required,False
9,2013,Ford,Focus (Push To Start),PATS Type C (Remote Function Actuator / Keyle...,Parameter Reset Required,False


In [63]:
# Concatenate all dfs to the main df
df_ford_models_str_moved_to_model = pd.concat([df_ford_drop_fseries_indexes, df_ford_fiesta_moved_strings, df_ford_contour_moved_strings, 
                                               df_ford_eseries_moved_strings, df_ford_explorer_moved_strings, df_ford_keyed_ignition_moved_string])

In [64]:
# Drop the indexes with the old models and drop column with bool values (F-Series SD)
df_ford_removed_old_models = df_ford_models_str_moved_to_model.drop(index=indexes_list_update_four, columns=['F-Series SD'])

In [65]:
# Reset index
df_ford_models_updated_reset_index = df_ford_removed_old_models.reset_index(drop=True)

In [66]:
# Call the funtion to confirm all models are correct
print(data_from_column(df_ford_models_updated_reset_index, 'Model'))

{'Explorer (4dr)', 'E-Series (Non-Fleet Vehicles)', 'Windstar', 'FUSION', 'F-150 Heritage', 'F-650 Super Duty', 'Contour (V6-only) (Built February 2nd or later)', 'ESCAPE (Push to Start)', 'Expedition', 'Focus (Push To Start)', 'Contour (V6-only) (Built February 1st or earlier)', 'Explorer Sport (2dr) (Built July 23rd 2000 or Earlier)', 'Ranger (2.3L, 3.0L, & 4.0L)', 'Freestyle', 'FUSION (Hybrid)', 'FIESTA (Keyed Ignition)', 'Focus (Keyed Ignition)', 'Five Hundred', 'Explorer Sport Trac', 'E-Series (Fleet Vehicles)', 'Explorer Sport Trac (Built July 23rd 2000 or Earlier)', 'GT', 'Flex', 'C-MAX (Hybrid)', 'TRANSIT CONNECT', 'Escape', 'Crown Victoria', 'C-MAX (Hybrid) (Push to Start)', 'FLEX (Push to Start)', 'Taurus X', 'F-150 Harley-Davidson', 'F-150', 'Explorer Sport (2dr)', 'Excursion', 'FOCUS EV', 'Escape (Hybrid)', 'Edge', 'Taurus (Push To Start)', 'F-350 Super Duty', 'Taurus', 'Freestar', 'Fusion (Hybrid)', 'Focus', 'Taurus (Duratec & SHO only)', 'TRANSIT', 'FUSION EV', 'Explorer 

In [67]:
# Call the funtion to confirm all models are correct
data_from_column(df_ford_models_updated_reset_index, 'Security')

{' PATS Type A (Instrument Cluster)',
 ' PATS Type A (Stand Alone PATS Module)',
 ' PATS Type B (Stand Alone PATS Module)',
 ' PATS Type C (Instrument Cluster)',
 ' PATS Type C (Remote Function Actuator / Keyless Vehicle Module)',
 ' PATS Type E (Powertrain Control Module)',
 ' PATS Type G (Instrument Cluster)',
 'PATS Type A (Instrument Cluster)',
 'PATS Type A (Stand Alone PATS Module)',
 'PATS Type B (Body Control Module)',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Instrument Cluster)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type C (Remote Function Actuator / Keyless Vehicle Module)',
 'PATS Type E (Powertrain Control Module)',
 'PATS Type F (Powertrain Control Module)',
 'PATS Type G (Instrument Cluster)',
 'PATS Type NA (No PATS)'}

### PATS Type and Anti-Theft Module Location separation:
PATS (Passive Anti-Theft System) Type and Anti-Theft Module Location are currently in the same column. However, the PATS Type defines which key learning procedure should be performed. To make it easier to indicate the correct procedure, it is necessary to separate this information. See below how it is currently and how it should look after the update.

- Before changes:  
Year: 2000  
Make: Ford  
Model: F-XX (Built February 1st or earlier)   
Security: PATS Type G (Instrument Cluster)  

- After changes:  
Year: 2000  
Make: Ford  
Model: F-XX  
PATS Type: PATS Type G  
Anti-Theft Module Location: Instrument Cluster

In [None]:
# This function will separate the columns as described above
def split_pats_type_from_module_location(df):

    # Split the security string considering the '(' as the delimiter
    df['Security'] = df['Security'].str.split('(')

    # Loop to iterate under the df indexes
    for idx in df.index:
        # Method to add a new column to the df, pulling the PATS Type from security column 
        df.at[idx, 'PATS Type'] = f'{df['Security'][idx][0]}'
        # Remove the PATS Type from the security column
        df['Security'][idx].pop(0)

    # Extracts the string from list under security column  
    df_anti_theft_module_location_list = df['Security'].str.join(' ')

    # Update the df
    df['Security'] = df_anti_theft_module_location_list

    # Remove char ')' from column security
    df['Security'] = df['Security'].str.replace(')', '', regex=False)

    # Rename column security to 'Anti-Theft Module Location'
    df = df.rename(columns={'Security': 'Anti-Theft Module Location'})

    # Reorder columns 
    df_updated_reordered_columns = df[['Year', 'Make', 'Model', 'Anti-Theft Module Location', 'PATS Type', 'ParameterReset']]

    # Return the df updated: columns for pats type and anti-theft module location separated
    return df_updated_reordered_columns

In [69]:
# Call the function to separate columns pats type, security, rename columns security and reorder the columns
df_ford_models_updated_reordering_columns = split_pats_type_from_module_location(df_ford_models_updated_reset_index)

### Check if the new column is correct has the correct string

In [70]:
data_from_column(df_ford_models_updated_reordering_columns, 'Anti-Theft Module Location')

{'Body Control Module',
 'Instrument Cluster',
 'No PATS',
 'Powertrain Control Module',
 'Remote Function Actuator / Keyless Vehicle Module',
 'Stand Alone PATS Module'}

In [71]:
data_from_column(df_ford_models_updated_reordering_columns, 'PATS Type')

{' PATS Type A ',
 ' PATS Type B ',
 ' PATS Type C ',
 ' PATS Type E ',
 ' PATS Type G ',
 'PATS Type A ',
 'PATS Type B ',
 'PATS Type C ',
 'PATS Type E ',
 'PATS Type F ',
 'PATS Type G ',
 'PATS Type NA '}

In [72]:
# Check if column parameter reset needs to be change
data_from_column(df_ford_models_updated_reordering_columns, 'ParameterReset')

{'Parameter Reset Not Required', 'Parameter Reset Required'}

### No changes needed in the ParameterReset column

In [None]:
# Export the to .csv file
# df_ford_models_updated_reordering_columns.to_csv('C:\\Language_Projects\\Language_Projects\\Python\\Flagship_1\\vehicle_security_system_data_cleaning\\data\\df_ford.csv', index=False)