In [2]:
import pandas as pd
import os

In [1]:
# This function creates the path to the main Excel file of this project
# Uses os library methods to ensure the file path works in both local and GitHub CI/CD environments
def create_file_path(file_relative_path):
    # Store the xlsx file dir into a var
    dir = '../data'
    
    # Iterate under dir data to have all files
    for filename in os.listdir(dir):
        # Condition to confirm which file is == year_make_model_df.xlsx
        if filename.endswith(file_relative_path):
            # Join the dir + filename to create the path to year_make_model_df.xlsx file  
            xlsx_file_fullpath = os.path.join(dir, filename)
            # Print the result
            print(xlsx_file_fullpath)
    return xlsx_file_fullpath

In [3]:
# Read excel 
df = pd.read_excel(create_file_path('year_make_model_df.xlsx'))

../data\year_make_model_df.xlsx


In [4]:
# Create df
df_nissan = df[df["Make"] == "Nissan"]

In [5]:
# Check how the df is
df_nissan.info()

<class 'pandas.core.frame.DataFrame'>
Index: 184 entries, 272 to 3372
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Year            184 non-null    int64 
 1   Make            184 non-null    object
 2   Model           184 non-null    object
 3   Security        154 non-null    object
 4   ParameterReset  0 non-null      object
dtypes: int64(1), object(4)
memory usage: 8.6+ KB


In [5]:
# Function to create list with unique items from given column
def data_from_column(df, column):
    # List to append the items
    data_column_list =[]

    # Iterate over given column
    for data in df[column]:
        # Append the items from the given column
        data_column_list.append(data)

    # Return a set with unique items from given column
    return set(data_column_list)

In [6]:
# Call the function to create a set with unique items from column model
data_from_column(df_nissan, "Model")

{'350Z',
 '370Z',
 'Altima',
 'Armada',
 'Cube',
 'Frontier',
 'GT-R',
 'Juke',
 'LEAF (EV)',
 'Maxima',
 'Murano',
 'Pathfinder',
 'Quest',
 'Rogue',
 'Sentra',
 'Titan',
 'Versa',
 'Xterra'}

No changes are needed in the Model names.

In [7]:
# Call the function to create a set with unique items from column Security
data_from_column(df_nissan, "Security")

{'KING CAB:\nStd\nSV, SV14 &\nPro-4X, S Model =Not Available\nCREW CAB:\nStd SV, Pro-4X\n& SL, S Model=Not Available',
 'KING CAB: All trim levels – optional only \nCREW CAB: All trim levels Std"',
 'KING CAB: N/A on XE & SE, Std on NISMO & LE\nCREW CAB: Std on SE, LE & NISMO',
 'KING CAB: Optional LE & Pro-4X Trim Levels\nCREW CAB: Std',
 'KING CAB: Optional LE & Pro-4X Trim Levels \nCREW CAB: Std',
 'KING CAB: Optional SV & Pro-4X Trim Levels\nCREW CAB: SV Pro-4X & SL Std',
 'KING CAB: Optional SV, SV14 & Pro- 4X\nCREW CAB: SV, Pro-4X & SL Std"',
 'KING CAB: Std on SE & LE, N/A on XE\nCREW CAB: Std on SE & LE, Opt on XE',
 'KING CAB: Std on SE, Pro- 4x & LE, N/A on XE\nCREW CAB: Std on SE Pro- 4x\n& LE',
 'KING CAB: Std on SE, Pro-4x & LE, N/A on XE\nCREW CAB: Std on SE Pro-4x\n& LE, Opt on XE',
 'KING CAB: Std on SE, Pro-4x & LE, N/A on XE\nCREW CAB: Std on SE Pro-4x & LE, Opt on XE',
 'KING CAB: Std on SV, Pro-4x\nCREW CAB: Std',
 'KING CAB: Std on SV, Pro-4x\nCREW CAB: Std on SV P

### Changes needed:
- Remove char \n
- Fill null rows

In [8]:
# Remove charactere '\n'
df_nissan['Security'] = df_nissan['Security'].str.replace('\n', ' ', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nissan['Security'] = df_nissan['Security'].str.replace('\n', ' ', regex=True)


In [9]:
# Fill out null values
df_nissan['Security'].fillna('Information not avaiable', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_nissan['Security'].fillna('Information not avaiable', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nissan['Security'].fillna('Information not avaiable', inplace=True)


In [10]:
# Drop column 'ParameterReset'
df_nissan_dropped_column = df_nissan.drop(columns=['ParameterReset'])

In [11]:
# Reset index
df_nissan_updated = df_nissan_dropped_column.reset_index(drop=True)

In [None]:
# Export the to .csv file
# df_nissan_updated.to_csv('C:\\Language_Projects\\Language_Projects\\Python\\Flagship_1\\vehicle_security_system_data_cleaning\\data\\df_nissan.csv', index=False)