**Model training **




In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle


# Load dataset
file_path = "ford.csv"  # Update this path if needed
df = pd.read_csv(file_path)

# Display dataset information
print(df.head())
print(df.info())

# Handle missing values
df.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_cols = ['model', 'transmission', 'fuelType']
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoder for future use

# Save the fitted label encoders
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

# Save the scaler after fitting
scaler = StandardScaler()
numerical_cols = ["year", "mileage", "tax", "mpg", "engineSize"]
X_numerical = df[numerical_cols].values
scaler.fit(X_numerical)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Select features and target variable
FEATURES = ["year", "mileage", "tax", "mpg", "engineSize", "model", "transmission", "fuelType"]
TARGET = "price"
X = df[FEATURES].values
y = df[TARGET].values

# Normalize numerical features
scaler = StandardScaler()
numerical_cols = ["year", "mileage", "tax", "mpg", "engineSize"]  # Only numerical columns to scale
X[:, :5] = scaler.fit_transform(X[:, :5])  # Scale numerical columns only

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model architecture
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)  # Regression output
])

# Compile model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Save trained model
model.save("car_price_model.h5")
print("Model saved as car_price_model.h5")

# Load trained model
MODEL_PATH = "car_price_model.h5"
try:
    # Explicitly specify 'mse' as a custom object
    model = keras.models.load_model(MODEL_PATH, custom_objects={'mse': tf.keras.losses.MeanSquaredError()})
    print(f"Model loaded successfully from {MODEL_PATH}")
except OSError:
    print(f"Error: Model file '{MODEL_PATH}' not found. Ensure the model is trained and saved correctly.")
    exit()



     model  year  price transmission  mileage fuelType  tax   mpg  engineSize
0   Fiesta  2017  12000    Automatic    15944   Petrol  150  57.7         1.0
1    Focus  2018  14000       Manual     9083   Petrol  150  57.7         1.0
2    Focus  2017  13000       Manual    12456   Petrol  150  57.7         1.0
3   Fiesta  2019  17500       Manual    10460   Petrol  145  40.3         1.5
4   Fiesta  2019  16500    Automatic     1482   Petrol  145  48.7         1.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17966 entries, 0 to 17965
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         17966 non-null  object 
 1   year          17966 non-null  int64  
 2   price         17966 non-null  int64  
 3   transmission  17966 non-null  object 
 4   mileage       17966 non-null  int64  
 5   fuelType      17966 non-null  object 
 6   tax           17966 non-null  int64  
 7   mpg           17966 non-null  fl

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 164864112.0000 - mae: 11910.6729 - val_loss: 62394908.0000 - val_mae: 6908.0225
Epoch 2/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 38350072.0000 - mae: 4915.6025 - val_loss: 18702740.0000 - val_mae: 3060.4185
Epoch 3/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 18816598.0000 - mae: 3032.8672 - val_loss: 16455165.0000 - val_mae: 2864.1089
Epoch 4/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 16463259.0000 - mae: 2857.9246 - val_loss: 14429498.0000 - val_mae: 2677.7649
Epoch 5/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 13805301.0000 - mae: 2623.2654 - val_loss: 12622260.0000 - val_mae: 2512.3640
Epoch 6/50
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 12804717.0000 - mae: 2489.5679 - val_loss: 1



Model saved as car_price_model.h5
Model loaded successfully from car_price_model.h5


In [3]:
import os
import requests
import numpy as np
try:
    import tensorflow as tf
    from tensorflow import keras
except ModuleNotFoundError:
    print("Error: TensorFlow is not installed. Please install it using 'pip install tensorflow'.")
    exit()
from sklearn.preprocessing import StandardScaler
import json

# API URL
API_URL = "https://databases-peer-16.onrender.com/cars"

try:
    with open('label_encoders.pkl', 'rb') as f:
        label_encoders = pickle.load(f)
    with open('scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)
    print("Loaded preprocessing transformers successfully")
except FileNotFoundError:
    print("Error: Preprocessing transformers not found. Please run the training script first.")
    exit()

# Fetch latest car data
def fetch_latest_car():
    response = requests.get(API_URL)
    if response.status_code == 200:
        return response.json()
    else:
        print("Failed to fetch data")
        return None

# Load trained model
MODEL_PATH = "car_price_model.h5"

# Check if model file exists
if not os.path.exists(MODEL_PATH):
    print(f"Error: Model file '{MODEL_PATH}' not found. Please make sure it is in the correct directory.")
    exit()

try:
    # Explicitly specify 'mse' as a custom object
    model = keras.models.load_model(MODEL_PATH, custom_objects={'mse': tf.keras.losses.MeanSquaredError()})
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()




Loaded preprocessing transformers successfully
Model loaded successfully.


In [4]:
# Load trained model
MODEL_PATH = "car_price_model.h5"

# Check if model file exists
if not os.path.exists(MODEL_PATH):
    print(f"Error: Model file '{MODEL_PATH}' not found. Please make sure it is in the correct directory.")
    exit()

try:
    # Load model
    model = keras.models.load_model(MODEL_PATH, custom_objects={'mse': tf.keras.losses.MeanSquaredError()})
    print("Model loaded successfully.")

    # Recompile model to avoid metrics warning
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    print("Model recompiled successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()




Model loaded successfully.
Model recompiled successfully.


***Data fetching***

In [5]:
API_URL = "https://databases-peer-16.onrender.com/cars/?limit=1"
response = requests.get(API_URL)

if response.status_code == 200:
    data = response.json()
    print("Fetched Data:", data)
else:
    print(f"Failed to fetch data. Status code: {response.status_code}, Response: {response.text}")


Fetched Data: [{'model': ' Fiesta', 'year': 2017, 'price': 12000.0, 'mileage': 15944, 'tax': 150, 'mpg': 57.7, 'enginesize': 1.0, 'carid': 2, 'transmissionid': 2, 'fueltypeid': 2}]


***Prediction making***

In [7]:
"""***Prediction making***"""

import requests
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

# API URL to fetch the latest car data
API_URL = "https://databases-peer-16.onrender.com/cars/?limit=1"

# Load the fitted label encoders and scaler
try:
    with open('label_encoders.pkl', 'rb') as f:
        label_encoders = pickle.load(f)
    with open('scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)
    print("Loaded preprocessing transformers successfully")
except FileNotFoundError:
    print("Error: Preprocessing transformers not found. Please run the training script first.")
    exit()

# Fetch latest car data
def fetch_latest_car():
    response = requests.get(API_URL)
    if response.status_code == 200:
        data = response.json()
        if data and len(data) > 0:
            print("Raw API response:", data[0])  # Debug print
            return data[0]
    print(f"Failed to fetch data. Status code: {response.status_code}")
    return None

# Feature columns (ensure the order matches training data)
NUMERICAL_FEATURES = ["year", "mileage", "tax", "mpg", "engineSize"]
CATEGORICAL_FEATURES = {
    'model': 'model',
    'transmission': 'transmissionid',
    'fuelType': 'fueltypeid'
}

def safe_transform(encoder, value, feature_name):
    try:
        # Get all classes that the encoder knows about
        known_classes = encoder.classes_

        # If the value isn't in known classes, use the most common class
        if value not in known_classes:
            print(f"Warning: Unknown {feature_name} value '{value}'. Using fallback value '{known_classes[0]}'")
            value = known_classes[0]

        return encoder.transform([value])[0]
    except Exception as e:
        print(f"Error transforming {feature_name}: {e}")
        # Return the first encoded value as fallback
        return 0

# Preprocess the data (categorical encoding and numerical scaling)
def preprocess_data(car_data):
    if not car_data:
        print("No car data provided")
        return None

    # Debug print
    print("\nProcessing car data:")
    print("Numerical features expected:", NUMERICAL_FEATURES)
    print("Categorical features mapping:", CATEGORICAL_FEATURES)
    print("Available data fields:", car_data.keys())

    # Handle categorical features
    encoded_data = []
    for original_col, api_col in CATEGORICAL_FEATURES.items():
        try:
            value = car_data.get(api_col)
            if value is None:
                print(f"Warning: Missing {api_col} in API response")
                value = label_encoders[original_col].classes_[0]  # Use first known class as fallback

            encoded_value = safe_transform(label_encoders[original_col], value, api_col)
            encoded_data.append(encoded_value)
        except Exception as e:
            print(f"Error processing categorical feature {api_col}: {e}")
            encoded_data.append(0)  # Fallback value

    # Handle numerical features
    numerical_data = []
    for feature in NUMERICAL_FEATURES:
        try:
            # Try both original case and lowercase
            value = car_data.get(feature) or car_data.get(feature.lower())
            if value is None:
                print(f"Warning: Missing numerical feature {feature}")
                value = 0  # Fallback value
            numerical_data.append(float(value))
        except (KeyError, ValueError) as e:
            print(f"Error processing numerical feature {feature}: {e}")
            numerical_data.append(0)  # Fallback value

    try:
        # Scale numerical features
        normalized_data = scaler.transform([numerical_data])

        # Combine numerical and categorical features
        final_data = np.concatenate([normalized_data[0], encoded_data])
        return final_data.reshape(1, -1)
    except Exception as e:
        print(f"Error in final data preparation: {e}")
        return None

# Predict car price
def predict_price():
    latest_car = fetch_latest_car()
    if latest_car:
        try:
            input_data = preprocess_data(latest_car)
            if input_data is not None:
                predicted_price = model.predict(input_data)[0][0]
                print(f"\nPredicted Price: ${predicted_price:.2f}")
            else:
                print("Failed to preprocess data")
        except Exception as e:
            print(f"Error during prediction: {e}")
            import traceback
            print("Full error:", traceback.format_exc())
    else:
        print("No data available for prediction.")

if __name__ == "__main__":
    predict_price()

Loaded preprocessing transformers successfully
Raw API response: {'model': ' Fiesta', 'year': 2017, 'price': 12000.0, 'mileage': 15944, 'tax': 150, 'mpg': 57.7, 'enginesize': 1.0, 'carid': 2, 'transmissionid': 2, 'fueltypeid': 2}

Processing car data:
Numerical features expected: ['year', 'mileage', 'tax', 'mpg', 'engineSize']
Categorical features mapping: {'model': 'model', 'transmission': 'transmissionid', 'fuelType': 'fueltypeid'}
Available data fields: dict_keys(['model', 'year', 'price', 'mileage', 'tax', 'mpg', 'enginesize', 'carid', 'transmissionid', 'fueltypeid'])
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step

Predicted Price: $11467.76
