In [65]:
import pandas as pd

In [66]:
# Load the cleaned data set
file_path = 'cleaned_usedCars.csv'
df = pd.read_csv(file_path)

In [67]:
# Select relevant attributes for item profile
selected_columns = ['Company', 'Model', 'Variant', 'FuelType', 'TransmissionType', 'ManufactureDate', 
                    'Kilometer', 'Owner', 'Price', 'Colour']

In [68]:
item_profiles = df[selected_columns].copy()

In [69]:
# Normalize categorical data
item_profiles['Company'] = item_profiles['Company'].str.title().str.strip()
item_profiles['Model'] = item_profiles['Model'].str.upper().str.strip()
item_profiles['Variant'] = item_profiles['Variant'].str.upper().str.strip()
item_profiles['Colour'] = item_profiles['Colour'].str.capitalize().str.strip()

In [70]:
# Convert categorical values to numerical encoding

# ✅ FuelType: 1-Petrol, 2-Diesel, 3-CNG, 0-Unknown
fuel_mapping = {'Petrol': 1, 'Diesel': 2, 'CNG': 3}
item_profiles['FuelType'] = item_profiles['FuelType'].map(fuel_mapping).fillna(0).astype(int)

# ✅ TransmissionType: 0-Manual, 1-Automatic, 0 for unknown
transmission_mapping = {'Manual': 0, 'Automatic': 1}
item_profiles['TransmissionType'] = item_profiles['TransmissionType'].map(transmission_mapping).fillna(0).astype(int)

# ✅ Colour: Assign codes in order of occurrence
color_mapping = {}
color_counter = 0
color_codes = []

for color in item_profiles['Colour']:
    if color not in color_mapping:
        color_mapping[color] = color_counter
        color_counter += 1
    color_codes.append(color_mapping[color])

item_profiles['Colour'] = color_codes

In [71]:
# Create a unique identifier for each car
item_profiles['Car_ID'] = item_profiles['Company'] + " " + item_profiles['Model'] + " " + item_profiles['Variant']

In [72]:
# Save the item profile
item_profiles.to_csv("C:/Users/Admin/Desktop/tybsc D98/Cars-Recommendation-System/item_profiles.csv", index=False)

print("Item profiles successfully created and saved as 'item_profiles.csv'.")
print(item_profiles.head())

Item profiles successfully created and saved as 'item_profiles.csv'.
         Company               Model           Variant  FuelType  \
0  Maruti Suzuki  CELERIO(2017-2019)     1.0 ZXI AMT O         0   
1  Maruti Suzuki                ALTO               LXI         0   
2        Hyundai           GRAND I10    1.2 KAPPA ASTA         0   
3           Tata               NEXON           XT PLUS         0   
4           Ford                FIGO  EXI DURATORQ 1.4         0   

   TransmissionType ManufactureDate  Kilometer  Owner   Price  Colour  \
0                 0      02-01-2018      33197      1  575000       0   
1                 0      03-01-2021      10322      1  435000       1   
2                 0      03-01-2015      37889      1  470000       2   
3                 0      08-01-2020      13106      1  990000       3   
4                 0      11-01-2010     104614      2  270000       0   

                                           Car_ID  
0  Maruti Suzuki CELERIO(2017-2