In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
car_data = pd.read_csv('Downloads/car data.csv')

In [None]:
print(car_data.head())

In [None]:
print(car_data.info())

In [None]:
print(car_data.isnull().sum())

In [None]:
# Perform the replacement as usual
car_data.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
car_data.replace({'Seller_Type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
car_data.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)

# Explicitly cast the columns to 'int' to avoid future issues
car_data['Fuel_Type'] = car_data['Fuel_Type'].astype(int)
car_data['Seller_Type'] = car_data['Seller_Type'].astype(int)
car_data['Transmission'] = car_data['Transmission'].astype(int)


In [None]:
x = car_data.drop(['Car_Name', 'Selling_Price'], axis=1)  # Dropping 'Car_Name' and 'Selling_Price' columns
y = car_data['Selling_Price']

In [None]:
print("Features (x):\n", x.head())
print("Target (y):\n", y.head())
features = car_data[['Year', 'Fuel_Type', 'Seller_Type', 'Transmission']]  # Choose relevant features
target = car_data['Present_Price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
print('\nR2 Score: ', r2_score(y_test, y_pred))

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# 8. Model Evaluation

# Linear Regression Predictions
y_pred_lr = lr_model.predict(X_test_scaled)
print("Linear Regression Performance:")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_lr)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_lr)}")
print(f"R-squared: {r2_score(y_test, y_pred_lr)}")

# Random Forest Predictions
y_pred_rf = rf_model.predict(X_test_scaled)
print("\nRandom Forest Performance:")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_rf)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_rf)}")
print(f"R-squared: {r2_score(y_test, y_pred_rf)}")

# 9. Making Predictions on New Data
new_data = {
    'Year': [2020],
    'Fuel_Type': [1],  # Fuel_Type: 1 for Diesel
    'Seller_Type': [0],  # Seller_Type: 0 for Dealer
    'Transmission': [1]  # Transmission: 1 for Automatic
}

new_df = pd.DataFrame(new_data)
new_scaled = scaler.transform(new_df)
predicted_price_rf = rf_model.predict(new_scaled)
predicted_price_lr = lr_model.predict(new_scaled)

print(f"Predicted Price (Random Forest): {predicted_price_rf[0]}")
print(f"Predicted Price (Linear Regression): {predicted_price_lr[0]}")

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Car Price Prediction')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(car_data['Selling_Price'], bins=30, kde=True, color='blue')
plt.title('Distribution of Selling Price')
plt.xlabel('Selling Price')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(car_data['Kms_Driven'], bins=30, kde=True, color='green')
plt.title('Distribution of Kms Driven')
plt.xlabel('Kms Driven')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(car_data['Present_Price'], kde=True)
plt.title('Distribution of Car Prices')
plt.show()

In [None]:
# Function to predict the price of a car by its name
def predict_price_by_car_name(car_name):
    # Check if the car name exists in the dataset
    if car_name not in car_data['Car_Name'].values:
        print("Car name not found in the dataset.")
        return
    
    # Extract the row corresponding to the car name
    car_details = car_data[car_data['Car_Name'] == car_name]
    
    # Display car details (Kms Driven, Transmission, Fuel Type)
    kms_driven = car_details['Kms_Driven'].values[0]
    transmission = "Automatic" if car_details['Transmission'].values[0] == 1 else "Manual"
    fuel_type = "Diesel" if car_details['Fuel_Type'].values[0] == 1 else "Petrol" if car_details['Fuel_Type'].values[0] == 0 else "CNG"
    
    print(f"\nCar Details for '{car_name}':")
    print(f"Kms Driven: {kms_driven}")
    print(f"Transmission: {transmission}")
    print(f"Fuel Type: {fuel_type}")
    
    # Filter features to match those used for training
    car_features = car_details[['Year', 'Fuel_Type', 'Seller_Type', 'Transmission']]
    
    # Standardize the features for prediction
    car_features_scaled = scaler.transform(car_features)
    
    # Use the Random Forest model to predict the price
    predicted_price_rf = rf_model.predict(car_features_scaled)
    predicted_price_lr = lr_model.predict(car_features_scaled)
    
    print(f"Predicted Price (Random Forest): {predicted_price_rf[0]:.2f} INR")
    print(f"Predicted Price (Linear Regression): {predicted_price_lr[0]:.2f} INR")

# Example: Get predicted price based on car name
car_name_input = input("Enter car name to predict its price: ")
predict_price_by_car_name(car_name_input)