In [1]:
import pandas as pd
import os

In [2]:
# This function creates the path to the main Excel file of this project
# Uses os library methods to ensure the file path works in both local and GitHub CI/CD environments
def create_file_path(file_relative_path):
    # Store the xlsx file dir into a var
    dir = '../data'
    
    # Iterate under dir data to have all files
    for filename in os.listdir(dir):
        # Condition to confirm which file is == year_make_model_df.xlsx
        if filename.endswith(file_relative_path):
            # Join the dir + filename to create the path to year_make_model_df.xlsx file  
            xlsx_file_fullpath = os.path.join(dir, filename)
            # Print the result
            print(xlsx_file_fullpath)
    return xlsx_file_fullpath

In [3]:
# Read excel 
df = pd.read_excel(create_file_path('year_make_model_df.xlsx'))

../data\year_make_model_df.xlsx


In [4]:
# Fuction to create df
def create_df(df, make):
    df_new = df[df['Make'] == make]
    return df_new

In [5]:
# Function to create list with unique items from given column
def data_from_column(df, column):
    # List to append the items
    data_column_list =[]

    # Iterate over given column
    for data in df[column]:
        # Append the items from the given column
        data_column_list.append(data)

    # Return a set with unique items from given column
    return set(data_column_list)

In [6]:
# Create a df to Mazda make
df_mazda = create_df(df, 'Mazda')

In [7]:
# Check the string in the model column to confirm if changes are needed
data_from_column(df_mazda, 'Model')

{626,
 'B-Series (2.3L, 3.0L, & 4.0L)',
 'B-Series (3.0L & 4.0L only)',
 'CX-3',
 'CX-30',
 'CX-5',
 'CX-7',
 'CX-9',
 'MPV',
 'MX-5 (Miata)',
 'Mazda2',
 'Mazda3',
 'Mazda5',
 'Mazda6',
 'Millenia',
 'Protege',
 'RX-8',
 'Tribute'}

In [7]:
# Check the string in the model column to confirm if changes are needed
data_from_column(df_mazda, 'Security')

{'Advanced Keyless Entry w/ Keyless Start): M-D\nKeyless Entry w/ Keyed Ignition: M-C',
 'Keyed Ignition: M-B\nAdvanced Keyless: M-D',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-D',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-E',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-F',
 'Keyed Ignition: M-C\nAdvanced Keyless: MD',
 'M-A',
 'M-B',
 'M-C',
 'M-G',
 'M-H',
 'No Security',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type E (Powertrain Control Module)',
 'PATS Type G (Instrument Cluster)',
 'PATS Type NA (No PATS)'}

In [8]:
# First change should be add '-' betwen MD mazda key instructions code,
# change from ...\nAdvanced Keyless: MD to \nAdvanced Keyless: M-D
df_mazda['Security'] = df_mazda['Security'].str.replace('Keyed Ignition: M-C\nAdvanced Keyless: MD', 'Keyed Ignition: M-C\nAdvanced Keyless: M-D', regex=False) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mazda['Security'] = df_mazda['Security'].str.replace('Keyed Ignition: M-C\nAdvanced Keyless: MD', 'Keyed Ignition: M-C\nAdvanced Keyless: M-D', regex=False)


In [9]:
# Print the string under security column AGAIN to confirm the changes
# Check the string in the model column to confirm if changes are needed
data_from_column(df_mazda, 'Security')

{'Advanced Keyless Entry w/ Keyless Start): M-D\nKeyless Entry w/ Keyed Ignition: M-C',
 'Keyed Ignition: M-B\nAdvanced Keyless: M-D',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-D',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-E',
 'Keyed Ignition: M-C\nAdvanced Keyless: M-F',
 'M-A',
 'M-B',
 'M-C',
 'M-G',
 'M-H',
 'No Security',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type E (Powertrain Control Module)',
 'PATS Type G (Instrument Cluster)',
 'PATS Type NA (No PATS)'}

### Changes needed:
Although Mazda models are manufactured by Ford and the key learning procedure is similar—with Parameter Reset required for most models—the key instructions are designated by two letters, always starting from M-A to H. Currently, the DataFrame contains some strings mixed with this code, as well as two different key instruction codes in the same row. These should be relocated to the 'Model' column.

- The '\n' character will be used as the delimiter to split the row into two lines.
- The ':' character will be used as the delimiter to move the string before it to the 'Model' column.

Current format:
Year: 2000  
Make: Mazda  
Model: M1  
Security: Advanced Keyless Entry w/ Keyless Start): M-D\nKeyless Entry w/ Keyed Ignition: M-C

Result:
Year: 2000  
Make: Mazda  
Model: M1 Advanced Keyless Entry w/ Keyless Start  
Mazda Key Instructions: M-A  
ParameterReset: Parameter Reset Required

In [10]:
# Fuction to create df
def create_df(df, column, string):
    df_new = df[df[column] == string]
    return df_new

In [11]:
# Function to explode in different lines based on a given pattern
def explode_lines(df, column, pattern):
    # Make a copy of the original df
    df_exploded = df.copy()
    # Split the string in 2 based on the delimiter given and 
    # The result was given inside a list 
    df_exploded[column] = df_exploded[column].str.split(pattern)
    # Explode in 2 lines based on the list items quantity 
    # ['item1', 'item2'], exploded in 2 lines
    df_exploded = df_exploded.explode(column)
    # Return the df exploded
    return df_exploded

In [12]:
# Function to move the strings before char (:) from column security to column model
def move_string_to_column_model(df):
    # Function to explode in separate lines strings from security column
    df_exploded = explode_lines(df, 'Security', '\n')
    # Reset the index
    df_resetted_index = df_exploded.reset_index(drop=True)
    # Split the string in the security column using the (:) as the delimeter
    df_resetted_index['Security'] = df_resetted_index['Security'].str.split(":")

    # Loop to iterate under the df indexes
    for idx in df_resetted_index.index:
        # Method to move the manufactured date to the model column
        df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
        # Remove manufactured date from security column
        df_resetted_index['Security'][idx].pop(0)

    # Extracts the string from list under security column  
    df_security_list = df_resetted_index['Security'].str.join(' ')

    # Update the df
    df_resetted_index['Security'] = df_security_list

    # Return a df with the strings before (:) moved to column model
    return df_resetted_index

### Append list function reason:
The models that were modified as mentioned above will be removed from the main DataFrame. For this, it is necessary to save the indexes of the rows containing the models prior to the updates. A function will be created to append these indexes to a unique list.

In [33]:
# Function to append/update the indexes to an existent list 
def append_indexes_to_list(df, existent_mazda_model_indexes_list):
    # Create a new list with indexes from the new df
    mazda_model_indexes_list = list(df.index)
    # Loop to iterate over the existent list
    for idx in existent_mazda_model_indexes_list:
        # Append the indexes to the new list
        mazda_model_indexes_list.append(idx)
    # Return a list wit the items appended to the new list
    return mazda_model_indexes_list

### Advanced Keyless Entry w/ Keyless Start): M-D\nKeyless Entry w/ Keyed Ignition: M-C

In [14]:
# Call the function to create the df with given string from column security
df_mazda_keyless_start_ignition = create_df(df_mazda, 'Security', 'Advanced Keyless Entry w/ Keyless Start): M-D\nKeyless Entry w/ Keyed Ignition: M-C')

In [15]:
# Create a list with indexes
mazda_keyless_start_ignition_indexes_list = list(df_mazda_keyless_start_ignition.index)

In [16]:
# Call the function to move the strings from column security to column model from MODEL: MX-5 (Miata)
df_mazda_keyless_start_ignition = move_string_to_column_model(df_mazda_keyless_start_ignition)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [17]:
# Print the head to confirm the changes
df_mazda_keyless_start_ignition.head()

Unnamed: 0,Year,Make,Model,Security,ParameterReset
0,2006,Mazda,MX-5 (Miata) (Advanced Keyless Entry w/ Keyles...,M-D,Parameter Reset Required
1,2006,Mazda,MX-5 (Miata) (Keyless Entry w/ Keyed Ignition),M-C,Parameter Reset Required
2,2007,Mazda,MX-5 (Miata) (Advanced Keyless Entry w/ Keyles...,M-D,Parameter Reset Required
3,2007,Mazda,MX-5 (Miata) (Keyless Entry w/ Keyed Ignition),M-C,Parameter Reset Required
4,2008,Mazda,MX-5 (Miata) (Advanced Keyless Entry w/ Keyles...,M-D,Parameter Reset Required


### 'Keyed Ignition: M-B\nAdvanced Keyless: M-D'

In [35]:
# Call the function to create the df with given string from column security
df_mazda_mb_md_key_instructions = create_df(df_mazda, 'Security', 'Keyed Ignition: M-B\nAdvanced Keyless: M-D')

In [36]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_one = append_indexes_to_list(df_mazda_mb_md_key_instructions, mazda_keyless_start_ignition_indexes_list)

In [37]:
# Call the function to move the strings from column security to column model from MODEL: Mazda6
df_mazda6_moved_strings = move_string_to_column_model(df_mazda_mb_md_key_instructions)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [38]:
# Print the head to confirm the changes
df_mazda6_moved_strings.head()

Unnamed: 0,Year,Make,Model,Security,ParameterReset
0,2006,Mazda,Mazda6 (Keyed Ignition),M-B,Parameter Reset Not Required
1,2006,Mazda,Mazda6 (Advanced Keyless),M-D,Parameter Reset Not Required
2,2007,Mazda,Mazda6 (Keyed Ignition),M-B,Parameter Reset Not Required
3,2007,Mazda,Mazda6 (Advanced Keyless),M-D,Parameter Reset Not Required
4,2008,Mazda,Mazda6 (Keyed Ignition),M-B,Parameter Reset Not Required


### Keyed Ignition: M-C\nAdvanced Keyless: M-D

In [27]:
# Call the function to create the df with given string from column security
df_mazda_mc_md_key_instructions = create_df(df_mazda, 'Security', 'Keyed Ignition: M-C\nAdvanced Keyless: M-D')

In [40]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_two = append_indexes_to_list(df_mazda_mc_md_key_instructions, indexes_list_update_one)

In [None]:
# Call the function to move the strings from column security to column model from MODEL
df_mazda_mc_md_moved_strings = move_string_to_column_model(df_mazda_mc_md_key_instructions)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [42]:
# Print the df after moving the strings
df_mazda_mc_md_moved_strings.head()

Unnamed: 0,Year,Make,Model,Security,ParameterReset
0,2006,Mazda,RX-8 (Keyed Ignition),M-C,Parameter Reset Required
1,2006,Mazda,RX-8 (Advanced Keyless),M-D,Parameter Reset Required
2,2007,Mazda,CX-7 (Keyed Ignition),M-C,Parameter Reset Required
3,2007,Mazda,CX-7 (Advanced Keyless),M-D,Parameter Reset Required
4,2007,Mazda,CX-9 (Keyed Ignition),M-C,Parameter Reset Required


### Keyed Ignition: M-C\nAdvanced Keyless: M-E

In [43]:
# Call the function to create the df with given string from column security
df_mazda_mc_me_key_instructions = create_df(df_mazda, 'Security', 'Keyed Ignition: M-C\nAdvanced Keyless: M-E')

In [44]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_three = append_indexes_to_list(df_mazda_mc_me_key_instructions, indexes_list_update_two)

In [45]:
# Call the function to move the strings from column security to column model from MODEL
df_mazda_mc_me_moved_strings = move_string_to_column_model(df_mazda_mc_me_key_instructions)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [46]:
# Print the head to confirm the changes
df_mazda_mc_me_moved_strings.head()

Unnamed: 0,Year,Make,Model,Security,ParameterReset
0,2009,Mazda,CX-7 (Keyed Ignition),M-C,Parameter Reset Required
1,2009,Mazda,CX-7 (Advanced Keyless),M-E,Parameter Reset Required
2,2010,Mazda,CX-7 (Keyed Ignition),M-C,Parameter Reset Required
3,2010,Mazda,CX-7 (Advanced Keyless),M-E,Parameter Reset Required
4,2011,Mazda,CX-7 (Keyed Ignition),M-C,Parameter Reset Required


### Keyed Ignition: M-C\nAdvanced Keyless: M-F

In [47]:
# Call the function to create the df with given string from column security
df_mazda_mc_mf_key_instructions = create_df(df_mazda, 'Security', 'Keyed Ignition: M-C\nAdvanced Keyless: M-F')

In [48]:
# Call the function to append the items from the EXISTENT list to an NEW list
indexes_list_update_four = append_indexes_to_list(df_mazda_mc_mf_key_instructions, indexes_list_update_three)

In [49]:
# Call the function to move the strings from column security to column model from MODEL
df_mazda_mc_mf_moved_strings = move_string_to_column_model(df_mazda_mc_mf_key_instructions)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_resetted_index['Model'][idx] = f'{df_resetted_index['Model'][idx]} ({df_resetted_index['Security'][idx][0]})'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gui

In [50]:
# Concatenate all dfs to the main df
df_mazda_str_moved_model = pd.concat([df_mazda, df_mazda_keyless_start_ignition, df_mazda6_moved_strings, df_mazda_mc_md_moved_strings, df_mazda_mc_me_moved_strings, df_mazda_mc_mf_moved_strings])

In [52]:
# Drop the indexes with the old models
df_mazda_removed_old_models = df_mazda_str_moved_model.drop(index=indexes_list_update_four)

In [53]:
# Reset index
df_mazda_reset_index = df_mazda_removed_old_models.reset_index(drop=True)

## Check if column Model and Security are properly updated

In [None]:
# Model column
print(data_from_column(df_mazda_reset_index, 'Model'))

{'CX-9 (Advanced Keyless)', 'CX-9 (Keyed Ignition)', 'RX-8 (Advanced Keyless)', 'Mazda3 (Keyed Ignition)', 'CX-3', 'CX-5', 'Protege', 'B-Series (3.0L & 4.0L only)', 'Mazda3', 'MX-5 (Miata)', 'Mazda3 (Advanced Keyless)', 'MX-5 (Miata) (Keyless Entry w/ Keyed Ignition)', 'Mazda5', 'Mazda2 (Keyed Ignition)', 'Millenia', 'Mazda6', 'CX-7 (Keyed Ignition)', 'Mazda2 (Advanced Keyless)', 'CX-9', 'RX-8', 'CX-7 (Advanced Keyless)', 'CX-30', 'RX-8 (Keyed Ignition)', 'MX-5 (Miata) (Advanced Keyless Entry w/ Keyless Start))', 'Mazda6 (Advanced Keyless)', 'Tribute', 'Mazda2', 'MPV', 'B-Series (2.3L, 3.0L, & 4.0L)', 'Mazda6 (Keyed Ignition)', 626}


In [57]:
# Security column
data_from_column(df_mazda_reset_index, 'Security')

{' M-B',
 ' M-C',
 ' M-D',
 ' M-E',
 ' M-F',
 'M-A',
 'M-B',
 'M-C',
 'M-G',
 'M-H',
 'No Security',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type E (Powertrain Control Module)',
 'PATS Type G (Instrument Cluster)',
 'PATS Type NA (No PATS)'}

In [None]:
# There are some key instruction codes that have whitespaces at their beginning.
# The whitespaces will be removed
# r'\1' - reference the first captured group (M-[A-Z])
df_mazda_reset_index['Security'] = df_mazda_reset_index['Security'].str.replace(r'\s(M-[A-Z])', r'\1', regex=True)

In [None]:
# Call the function to create a set with all strings under column Security to confirm the changes
data_from_column(df_mazda_reset_index, 'Security')

{'M-A',
 'M-B',
 'M-C',
 'M-D',
 'M-E',
 'M-F',
 'M-G',
 'M-H',
 'No Security',
 'PATS Type B (Stand Alone PATS Module)',
 'PATS Type C (Powertrain Control Module)',
 'PATS Type E (Powertrain Control Module)',
 'PATS Type G (Instrument Cluster)',
 'PATS Type NA (No PATS)'}

In [61]:
# Call the function to check the strings from ParameterReset colum
data_from_column(df_mazda_reset_index, 'ParameterReset')

{'Parameter Reset Not Required', 'Parameter Reset Required'}

In [None]:
# Export the to .csv file
# df_mazda_reset_index.to_csv('C:\\Language_Projects\\Language_Projects\\Python\\Flagship_1\\vehicle_security_system_data_cleaning\\data\\df_mazda.csv', index=False)