<a href="https://colab.research.google.com/github/azrabano23/carquest/blob/main/germancar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import sqlite3
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

file_path = "germany_auto_industry_dataset.csv"  # Replace with your file path
data = pd.read_csv(file_path)

print("Column names in the dataset:", data.columns.tolist())

data.columns = data.columns.str.strip()

data.rename(columns={"Price (€)": "Price"}, inplace=True)

print("Updated column names:", data.columns.tolist())

label_encoders = {}
for column in ['Brand', 'Model', 'Fuel Type', 'Transmission', 'City']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Handle missing values if any
data.fillna(data.median(), inplace=True)

X = data[['Brand', 'Year', 'Mileage', 'Fuel Type', 'Fuel Consumption (L/100km)', 'Horsepower (HP)', 'Transmission']]
y = data['Price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train a Random Forest Regressor model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# eval the model
y_pred = model.predict(X_test)

# regression metrics for evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# create SQLite database and save the data
conn = sqlite3.connect('car_recommender.db')
data.to_sql('cars', conn, if_exists='replace', index=False)

def query_database(query):
    return pd.read_sql_query(query, conn)

print("Welcome to the Car Recommender System!")

brand = input(f"Select Brand ({', '.join(label_encoders['Brand'].classes_)}): ")
min_price = int(input(f"Enter Minimum Price (in €): "))
max_price = int(input(f"Enter Maximum Price (in €): "))
city = input(f"Select City ({', '.join(label_encoders['City'].classes_)}): ")

# conv user input to encoded values with validation
try:
    brand_encoded = label_encoders['Brand'].transform([brand])[0]
except ValueError:
    print(f"Invalid brand. Please select from: {', '.join(label_encoders['Brand'].classes_)}")
    exit()

try:
    city_encoded = label_encoders['City'].transform([city])[0]
except ValueError:
    print(f"Invalid city. Please select from: {', '.join(label_encoders['City'].classes_)}")
    exit()

query = f"""
SELECT * FROM cars WHERE Brand = {brand_encoded} AND Price >= {min_price} AND Price <= {max_price} AND City = {city_encoded}
"""
recommendations = query_database(query)

# Display recommendations
if not recommendations.empty:
    print("Recommended Cars:")
    print(recommendations)
else:
    print("No cars match your criteria.")


prompt = input("Ask a question about the dataset (e.g., What is the most fuel-efficient car?): ")

if "fuel-efficient" in prompt.lower():
    fuel_efficient_car = data.loc[data['Fuel Consumption (L/100km)'].idxmin()]
    print(f"The most fuel-efficient car is {fuel_efficient_car['Model']} with {fuel_efficient_car['Fuel Consumption (L/100km)']} L/100km.")
else:
    print("Sorry, I can't answer that question yet.")


Column names in the dataset: ['Brand', 'Model', 'Year', 'Mileage', 'Fuel Type', 'Fuel Consumption (L/100km)', 'Horsepower (HP)', 'Transmission', 'Price ', 'City']
Updated column names: ['Brand', 'Model', 'Year', 'Mileage', 'Fuel Type', 'Fuel Consumption (L/100km)', 'Horsepower (HP)', 'Transmission', 'Price', 'City']
Mean Squared Error: 865257586.2863644
R-squared: -0.11411018942439632
Welcome to the Car Recommender System!
Select Brand (Audi, BMW, Mercedes, Opel, Porsche, Volkswagen): BMW
Enter Minimum Price (in €): 100000
Enter Maximum Price (in €): 1000000000
Select City (Berlin, Cologne, Frankfurt, Hamburg, Munich): Munich
No cars match your criteria.
Ask a question about the dataset (e.g., What is the most fuel-efficient car?): What is the most fuel-efficient car?
The most fuel-efficient car is 16.0 with 3.0 L/100km.
