In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor

from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, KFold
import gradio as gr

2024-05-10 18:16:17.531034: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv('used_cars_UK.csv')

def remove_outliers(df, col_name):
    z_scores = np.abs((df[col_name] - df[col_name].mean()) / df[col_name].std())
    filtered_df = df[z_scores < 3]
    return filtered_df
print(df.shape)
df=remove_outliers(df, 'Price')
df=df.drop("Service history",axis=1)
df.drop(df.columns[0], axis=1, inplace=True)
df['Previous Owners'].fillna(df['Previous Owners'].value_counts().idxmax(), inplace=True)
df.dropna( inplace=True)
print(df.shape)
df.head()


(3685, 14)
(3547, 12)


Unnamed: 0,title,Price,Mileage(miles),Registration_Year,Previous Owners,Fuel type,Body type,Engine,Gearbox,Doors,Seats,Emission Class
0,SKODA Fabia,6900,70189,2016,3.0,Diesel,Hatchback,1.4L,Manual,5.0,5.0,Euro 6
1,Vauxhall Corsa,1495,88585,2008,4.0,Petrol,Hatchback,1.2L,Manual,3.0,5.0,Euro 4
2,Hyundai i30,949,137000,2011,2.0,Petrol,Hatchback,1.4L,Manual,5.0,5.0,Euro 5
3,MINI Hatch,2395,96731,2010,5.0,Petrol,Hatchback,1.4L,Manual,3.0,4.0,Euro 4
4,Vauxhall Corsa,1000,85000,2013,2.0,Diesel,Hatchback,1.3L,Manual,5.0,5.0,Euro 5


In [3]:
unique_values = {}
non_numeric_columns = df.columns
for column in non_numeric_columns:
    unique_values[column] = df[column].unique().tolist()
print(non_numeric_columns)

Index(['title', 'Price', 'Mileage(miles)', 'Registration_Year',
       'Previous Owners', 'Fuel type', 'Body type', 'Engine', 'Gearbox',
       'Doors', 'Seats', 'Emission Class'],
      dtype='object')


In [4]:
from sklearn.preprocessing import OneHotEncoder

import pandas as pd

# Assuming 'df' is your DataFrame

# Define columns to be one-hot encoded
columns_to_encode = ['title', 'Fuel type', 'Body type', 'Gearbox']

# Instantiate OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)

# Fit and transform the selected columns
encoded_data = encoder.fit_transform(df[columns_to_encode])

# Get feature names for the encoded columns
encoded_column_names = encoder.get_feature_names_out(input_features=columns_to_encode)

# Create DataFrame from encoded data (converting sparse matrix to dense array)
encoded_df = pd.DataFrame(encoded_data, columns=encoded_column_names, index=df.index)

# Drop the original columns from the DataFrame
df = df.drop(columns_to_encode, axis=1)

# Concatenate the original DataFrame with the encoded DataFrame
df = pd.concat([df, encoded_df], axis=1)

registration_year_min = df["Registration_Year"].min()

# Process other transformations
df['Emission Class'] = df['Emission Class'].str[5:].astype(int)
df['Engine'] = df['Engine'].str[:3].astype(float)
df["Registration_Year"] = df["Registration_Year"] - registration_year_min

df.head()

Unnamed: 0,Price,Mileage(miles),Registration_Year,Previous Owners,Engine,Doors,Seats,Emission Class,title_Abarth 500,title_Abarth 595,...,Body type_Convertible,Body type_Coupe,Body type_Estate,Body type_Hatchback,Body type_MPV,Body type_Pickup,Body type_SUV,Body type_Saloon,Gearbox_Automatic,Gearbox_Manual
0,6900,70189,23,3.0,1.4,5.0,5.0,6,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1495,88585,15,4.0,1.2,3.0,5.0,4,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,949,137000,18,2.0,1.4,5.0,5.0,5,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2395,96731,17,5.0,1.4,3.0,4.0,4,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,1000,85000,20,2.0,1.3,5.0,5.0,5,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [5]:
y = df["Price"].values.reshape(df.shape[0],1)
x = df.drop("Price",axis=1)

x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

x = x_scaler.fit_transform(x)
y =y_scaler.fit_transform(y)


x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=42)


In [6]:
x_train

array([[0.07354659, 0.46666667, 0.875     , ..., 1.        , 1.        ,
        0.        ],
       [0.07708952, 0.5       , 0.5       , ..., 0.        , 0.        ,
        1.        ],
       [0.08737869, 0.43333333, 0.125     , ..., 0.        , 0.        ,
        1.        ],
       ...,
       [0.08630762, 0.6       , 0.25      , ..., 0.        , 0.        ,
        1.        ],
       [0.09008115, 0.63333333, 0.5       , ..., 0.        , 0.        ,
        1.        ],
       [0.03602562, 0.9       , 0.125     , ..., 0.        , 0.        ,
        1.        ]])

In [7]:
x_train = np.asarray(x_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
x_test = np.asarray(x_test).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)
model = keras.Sequential([
    keras.layers.Dense(100, activation='relu', input_shape=(x_train.shape[1],)),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(1)
])

x_train.shape

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
model.fit(x_train, y_train, epochs=100, batch_size=32, verbose=1)

# Evaluate the model
loss, mae = model.evaluate(x_test, y_test, verbose=0)
print(f'Test Mean Absolute Error: {mae}')
print(f'loss: {loss}')
# Make predictions
predictions = model.predict(x_test)

r2 = r2_score(y_test, predictions)
print("R2 Score:", r2)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-05-10 18:16:20.986385: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0405 - mae: 0.1546  
Epoch 2/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0087 - mae: 0.0695
Epoch 3/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 989us/step - loss: 0.0074 - mae: 0.0634
Epoch 4/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921us/step - loss: 0.0047 - mae: 0.0504
Epoch 5/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 856us/step - loss: 0.0037 - mae: 0.0433
Epoch 6/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 876us/step - loss: 0.0031 - mae: 0.0397
Epoch 7/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 912us/step - loss: 0.0031 - mae: 0.0394
Epoch 8/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 885us/step - loss: 0.0029 - mae: 0.0381
Epoch 9/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 958us/step

[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 935us/step - loss: 9.1674e-04 - mae: 0.0211
Epoch 71/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 825us/step - loss: 9.8750e-04 - mae: 0.0224
Epoch 72/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 853us/step - loss: 0.0010 - mae: 0.0220  
Epoch 73/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 818us/step - loss: 8.7506e-04 - mae: 0.0204
Epoch 74/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 797us/step - loss: 9.7078e-04 - mae: 0.0218
Epoch 75/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 840us/step - loss: 0.0012 - mae: 0.0250  
Epoch 76/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 787us/step - loss: 9.7396e-04 - mae: 0.0211
Epoch 77/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 836us/step - loss: 9.0727e-04 - mae: 0.0206
Epoch 78/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━

In [8]:
predictions1 = model.predict(x_train)
r2 = r2_score(y_train, predictions1)
print("R2 Score:", r2)

[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 660us/step
R2 Score: 0.9827976342896612


In [15]:
# data = pd.DataFrame({
#     'title': ['SKODA Fabia'],
#     'Mileage': [70189],
#     'Registration_Year': [2016],
#     'Previous Owners': [3.0],
#     'Fuel type': ['Diesel'],
#     'Body type': ['Hatchback'],
#     'Engine': [1.4],
#     'Gearbox': ['Manual'],
#     'Doors': [5.0],
#     'Seats': [5.0],
#     'Emission Class': [6.0]
# })
# encoded_data_new = encoder.transform(data[columns_to_encode])

# encoded_column_names_new = encoder.get_feature_names_out(input_features=columns_to_encode)

# encoded_df_new = pd.DataFrame(encoded_data_new, columns=encoded_column_names_new, index=data.index)

# data = data.drop(columns_to_encode, axis=1)

# data = pd.concat([data, encoded_df_new], axis=1)

# data['Emission Class'] = data['Emission Class'].astype(int)
# data['Engine'] = data['Engine'].astype(float)
# data["Registration_Year"] = data["Registration_Year"] - registration_year_min

# scaled = x_scaler.transform(data.values)

# print(scaled)

# prediction = model.predict(scaled)
# print(y_scaler.inverse_transform(prediction)[0][0])


def predict_price(title, mileage, registration_year, previous_owners, fuel_type, body_type, engine, gearbox, doors, seats, emission_class):
    data = pd.DataFrame({
        'title': [title],
        'Mileage': [int(mileage)],
        'Registration_Year': [int(registration_year)],
        'Previous Owners': [float(previous_owners)],
        'Fuel type': [fuel_type],
        'Body type': [body_type],
        'Engine': [engine],
        'Gearbox': [gearbox],
        'Doors': [float(doors)],
        'Seats': [float(seats)],
        'Emission Class': [emission_class]
    })
    
    # One-hot encode categorical features
    encoded_data = encoder.transform(data[columns_to_encode])
    encoded_column_names_new = encoder.get_feature_names_out(input_features=columns_to_encode)
    encoded_df = pd.DataFrame(encoded_data, columns=encoded_column_names_new, index=data.index)
    data = data.drop(columns_to_encode, axis=1)
    data = pd.concat([data, encoded_df], axis=1)
    
    # Perform scaling
    data['Emission Class'] = data['Emission Class'].str[5:].astype(int)
    data['Engine'] = data['Engine'].str[:3].astype(float)
    data["Registration_Year"] = data["Registration_Year"] - registration_year_min
    scaled_data = x_scaler.transform(data.values)
    
    # Make prediction
    prediction = model.predict(scaled_data)
    
    return y_scaler.inverse_transform(prediction)[0][0]


In [16]:
inputs = [
    gr.Dropdown(label="title", choices=unique_values["title"]),
    gr.Dropdown(label="Mileage", choices=unique_values["Mileage(miles)"]),
    gr.Dropdown(label="Registration_Year", choices=unique_values["Registration_Year"]),
    gr.Dropdown(label="Previous Owners", choices=unique_values["Previous Owners"]),
    gr.Dropdown(label="Fuel type", choices=unique_values["Fuel type"]),
    gr.Dropdown(label="Body type", choices=unique_values["Body type"]),
    gr.Dropdown(label="Engine", choices=unique_values["Engine"]),
    gr.Dropdown(label="Gearbox", choices=unique_values["Gearbox"]),
    gr.Dropdown(label="Doors", choices=unique_values["Doors"]),
    gr.Dropdown(label="Seats", choices=unique_values["Seats"]),
    gr.Dropdown(label="Emission Class", choices=unique_values["Emission Class"])
]

In [17]:
iface = gr.Interface(
    fn=predict_price, 
    inputs=inputs,
    outputs="text",
    title="Car Price Predictor",
    description="Enter the features of the car to get the predicted price."
)

iface.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




IMPORTANT: You are using gradio version 3.50.0, however version 4.29.0 is available, please upgrade.
--------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


