In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("cars data.csv")
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,vitz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,changan alsvin,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [3]:
data.drop('Owner', axis=1, inplace=True)
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission
0,vitz,2014,3.35,5.59,27000,Petrol,Dealer,Manual
1,changan alsvin,2013,4.75,9.54,43000,Diesel,Dealer,Manual
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual


In [4]:
from sklearn.utils import resample

In [5]:
def augment_data(df):
    augmented_data = resample(df, replace=True, n_samples=len(df)*10)

    return augmented_data

augmented_data = augment_data(data)

#saving augmented data to new CSV file
augmented_data.to_csv('augmented_dataset1.csv', index=False)

In [6]:
features = ['Year','Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission']
target = 'Car_Name'

In [7]:
X = data[features].copy()
y = data[target]

In [8]:
# handling null values if any
data['Selling_Price'].fillna(data['Selling_Price'].mean(), inplace=True)
data['Present_Price'].fillna(data['Present_Price'].mean(), inplace=True)
# since there 3 are categorical columns
data['Fuel_Type'].fillna(data['Fuel_Type'].mode()[0], inplace=True)
data['Seller_Type'].fillna(data['Seller_Type'].mode()[0], inplace=True)
data['Transmission'].fillna(data['Transmission'].mode()[0], inplace=True)


In [9]:
print(data.isnull().sum())

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
dtype: int64


In [10]:
from sklearn.preprocessing import LabelEncoder

In [11]:
label_encoder = LabelEncoder()

In [12]:
X['Fuel_Type'] = label_encoder.fit_transform(X['Fuel_Type'])
X['Seller_Type'] = label_encoder.fit_transform(X['Seller_Type'])
X['Transmission'] = label_encoder.fit_transform(X['Transmission'])

In [13]:
from sklearn.model_selection import train_test_split,cross_val_score

In [14]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)

In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
model = RandomForestClassifier(n_estimators = 100, random_state = 42)
model.fit(X_train, y_train)

In [17]:
predictions = model.predict(X_test)

In [18]:
from sklearn.metrics import accuracy_score, f1_score

In [19]:
accuracy = accuracy_score(y_test, predictions)
print("Accuracy : ",accuracy)

Accuracy :  0.47540983606557374


In [20]:
f1 = f1_score(y_test, predictions, average='weighted')
print(f"F1 Score: {f1}")

# Cross-validation
cv_scores = cross_val_score(model, X, y, cv=5)
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean Cross-validation score: {cv_scores.mean()}") 

F1 Score: 0.45880822274264893




Cross-validation scores: [0.40983607 0.51666667 0.43333333 0.4        0.43333333]
Mean Cross-validation score: 0.43863387978142077


In [21]:
def get_recommendation(year, selling_price, present_price, kms_driven, fuel_type, seller_type, transmission):
    # Create DataFrame for user input
    input_data = pd.DataFrame({
        'Year': [year],
        'Selling_Price': [selling_price],
        'Present_Price': [present_price],
        'Kms_Driven': [kms_driven],
        'Fuel_Type': [fuel_type],
        'Seller_Type': [seller_type],
        'Transmission': [transmission],
    })

    # Encoding categorical variables
    label_encoder = LabelEncoder()
    input_data['Fuel_Type'] = label_encoder.fit_transform(input_data['Fuel_Type'])
    input_data['Seller_Type'] = label_encoder.fit_transform(input_data['Seller_Type'])
    input_data['Transmission'] = label_encoder.fit_transform(input_data['Transmission'])

    # Make prediction 
    prediction = model.predict(input_data)
    return prediction[0] 

In [22]:
import ipywidgets as widgets
from IPython.display import display

In [23]:
year_widget = widgets.IntText(description="Year", value=2020)
selling_price_widget = widgets.FloatText(description="Selling Price")
present_price_widget = widgets.FloatText(description="Present Price")
kms_driven_widget = widgets.IntText(description="Kms Driven")
fuel_type_widget = widgets.Dropdown(
    options=["Petrol", "Diesel", "CNG"],
    description="Fuel Type",
)
seller_type_widget = widgets.Dropdown(
    options=["Dealer", "Individual"],
    description="Seller Type",
)
transmission_widget = widgets.Dropdown(
    options=["Manual", "Automatic"],
    description="Transmission",
)
output_widget = widgets.Output()

def on_button_click(b):
    with output_widget:
        output_widget.clear_output()
        year = year_widget.value
        selling_price = selling_price_widget.value
        present_price = present_price_widget.value
        kms_driven = kms_driven_widget.value
        fuel_type = fuel_type_widget.value
        seller_type = seller_type_widget.value
        transmission = transmission_widget.value
        
        recommended_car = get_recommendation(year, selling_price, present_price, kms_driven, fuel_type, seller_type, transmission)
        
        print(f"{recommended_car.upper()} is a good option for you.")

button = widgets.Button(description="Get Recommendation")
button.on_click(on_button_click)

input_widgets = widgets.VBox([
    year_widget, 
    selling_price_widget, 
    present_price_widget, 
    kms_driven_widget, 
    fuel_type_widget, 
    seller_type_widget, 
    transmission_widget, 
    button,
    output_widget
])

display(input_widgets)


VBox(children=(IntText(value=2020, description='Year'), FloatText(value=0.0, description='Selling Price'), Flo…