<a href="https://colab.research.google.com/github/daksh212004/Automated-Car-Dent-and-Prediction-system/blob/main/Predictionmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Optional: Uncomment if running in a fresh Colab environment
# !pip install pandas scikit-learn

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error


In [None]:
 df = pd.read_csv("All_cars_dataset.csv")
df.head()


Unnamed: 0,Name,EMI,Price,Mileage,ENGINE,TRANSMISSION,FUEL TYPE,Price range,Length (mm),Wheelbase (mm),...,Fuel Capacity (L),Height (mm),Ground Clearance (mm),Colors,Seating Capacity,Displacement (cc),Peak Power,Peak Torque,Color Varients,Top_Speed
0,Porsche Macan,118950,69.98 Lakh,14 kmpl,1984 to 2995 cc,Automatic,Petrol,₹ 69.98 - 85.01 Lakh,4696,2807,...,75,1624,198,"Agate Grey, Black, Carmine Red, Carrara White...",5,1984,251 BHP@5000 RPM,370 NM @1600 RPM,24,254 Km/h
1,Porsche 718,145262,85.46 Lakh,9 kmpl,1988 to 3995 cc,"Automatic, Manual",Petrol,₹ 85.46 Lakh - 1.64 Crore,4379,2475,...,54,1281,128,"Agate Grey, Black, Carmine Red, Carrara White...",2,1988,295 BHP@6500 RPM,380 NM @1950 RPM,23,275km/h
2,Porsche Cayenne,202272,1.2 Crore,9 - 40 kmpl,2995 to 3996 cc,Automatic,Petrol,₹ 1.2 - 1.93 Crore,4918,2895,...,75,1696,210,"Biscay Blue, Black, Carrara White, Jet Black ...",5,2995,456 BHP@5250 RPM,700 NM,22,286 Km/h
3,Porsche Cayenne Coupe,224165,1.32 Crore,10.75 kmpl,2995 to 3996 cc,Automatic,Petrol,₹ 1.32 - 1.98 Crore,4931,2895,...,75,1676,190,"Biskay Blue , Carrara White, Crayon, Dolomite...",4,2995,335 BHP@5300 RPM,450 NM @1340 RPM,18,286 Km/h
4,Porsche Panamera,245599,1.44 Crore,8-37 kmpl,2894 to 4806 cc,Automatic,Petrol,₹ 1.44 - 2.43 Crore,5049,2950,...,75,1423,133,"Amethyst, Aventurine Green, Burgundy Red, Car...",4,2894,326 BHP@5400 RPM,450 NM @1750 RPM,28,310 Km/h


In [None]:
# Base cost by damage location
location_cost = {
    'hood': 2000,
    'door': 1500,
    'bumper': 1800,
    'roof': 2200,
    'side_mirror': 800,
    'fender': 1600
}
locations = list(location_cost.keys())

# Brand multiplier
brand_multiplier = {
    'Toyota': 1.0,
    'Honda': 1.05,
    'Hyundai': 0.95,
    'BMW': 1.8,
    'Mercedes': 2.0,
    'Ford': 1.1,
    'Kia': 0.9,
    'Audi': 1.9
}
brands = list(brand_multiplier.keys())

np.random.seed(42)
df['Brand'] = np.random.choice(brands, size=len(df))
df['Scratch_Location'] = np.random.choice(locations, size=len(df))

df['Brand_Multiplier'] = df['Brand'].map(brand_multiplier)
df['Location_Base_Cost'] = df['Scratch_Location'].map(location_cost)



In [None]:
severities = ['minor', 'moderate', 'severe']
severity_multiplier = {
    'minor': 1.0,
    'moderate': 1.5,
    'severe': 2.2
}
df['Severity'] = np.random.choice(severities, size=len(df))
df['Severity_Multiplier'] = df['Severity'].map(severity_multiplier)


In [None]:
df['Car_Price_Lakhs'] = np.random.randint(5, 101, size=len(df))  # 5 to 100 inclusive

# Define 5-lakh interval bins and labels
bins = list(range(0, 105, 5))  # 0–5, 5–10, ..., 95–100
labels = [f'{i}-{i+5}L' for i in range(0, 100, 5)]

# Assign price range category
df['Price_Range'] = pd.cut(df['Car_Price_Lakhs'], bins=bins, labels=labels)

# Extended price multiplier logic up to ₹1 crore
def get_price_multiplier(price):
    if price < 10:
        return 0.9
    elif price < 15:
        return 1.0
    elif price < 20:
        return 1.1
    elif price < 25:
        return 1.2
    elif price < 30:
        return 1.3
    elif price < 35:
        return 1.4
    elif price < 40:
        return 1.5
    elif price < 45:
        return 1.6
    elif price < 50:
        return 1.7
    elif price < 55:
        return 1.8
    elif price < 60:
        return 1.9
    elif price < 65:
        return 2.0
    elif price < 70:
        return 2.1
    elif price < 75:
        return 2.2
    elif price < 80:
        return 2.3
    elif price < 85:
        return 2.4
    elif price < 90:
        return 2.5
    elif price < 95:
        return 2.6
    elif price < 100:
        return 2.7
    else:
        return 2.8  # For exactly 100 or slightly more

df['Price_Multiplier'] = df['Car_Price_Lakhs'].apply(get_price_multiplier)


In [None]:
df['Estimated_Cost'] = (
    df['Location_Base_Cost'] *
    df['Brand_Multiplier'] *
    df['Severity_Multiplier'] *
    df['Price_Multiplier']
)
df[['Brand', 'Scratch_Location', 'Severity', 'Car_Price_Lakhs', 'Estimated_Cost']].head()


Unnamed: 0,Brand,Scratch_Location,Severity,Car_Price_Lakhs,Estimated_Cost
0,Kia,bumper,minor,67,3402.0
1,BMW,bumper,minor,90,8424.0
2,Mercedes,hood,severe,55,16720.0
3,Kia,bumper,moderate,29,3159.0
4,Hyundai,side_mirror,minor,62,1520.0


In [None]:
le_brand = LabelEncoder()
le_location = LabelEncoder()
le_severity = LabelEncoder()
le_price = LabelEncoder()

df['Brand_encoded'] = le_brand.fit_transform(df['Brand'])
df['Location_encoded'] = le_location.fit_transform(df['Scratch_Location'])
df['Severity_encoded'] = le_severity.fit_transform(df['Severity'])
df['Price_encoded'] = le_price.fit_transform(df['Price_Range'])


In [None]:
X = df[['Brand_encoded', 'Location_encoded', 'Severity_encoded', 'Price_encoded']]
y = df['Estimated_Cost']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))


Mean Absolute Error: 1835.671066666667


In [None]:
def predict_cost(brand, location, severity, car_price_lakhs):
    try:
        # Encode inputs
        brand_code = le_brand.transform([brand])[0]
        location_code = le_location.transform([location])[0]
        severity_code = le_severity.transform([severity])[0]

        # Bin and encode price
        price_range = pd.cut([car_price_lakhs], bins=bins, labels=labels)[0]
        price_code = le_price.transform([price_range])[0]

        # Predict
        predicted_cost = model.predict([[brand_code, location_code, severity_code, price_code]])[0]
        return f"Estimated Scratch Repair Cost for {brand} ({location}, {severity}, ₹{car_price_lakhs}L): ₹{round(predicted_cost, 2)}"
    except Exception as e:
        return f"Error: {e}\nEnsure inputs are valid and in training set ranges."


In [None]:
print(predict_cost("BMW", "hood", "severe", 65))



Estimated Scratch Repair Cost for BMW (hood, severe, ₹65L): ₹8107.7


