In [1]:
#pip install streamlit

In [2]:
#pip install altair==4

In [3]:
import streamlit as st
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder

# Load the pre-trained XGBoost model
with open('xgb_model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

# Function to perform feature engineering
def perform_feature_engineering(df):
    # Drop unnecessary columns
    df = df.drop(['seller', 'offerType', 'model', 'name', 'dateCrawled', 'lastSeen', 'dateCreated', 'postalCode'], axis=1)

    # Creating dummies
    df = pd.get_dummies(df, columns=['abtest','vehicleType', 'fuelType', 'gearbox', 'notRepairedDamage'], drop_first=True)

    # Label encoding for the 'brand' column
    label_encoder = LabelEncoder()
    df['brand_encoded'] = label_encoder.fit_transform(df['brand'])

    # Dropping brand column
    df = df.drop('brand', axis=1)
    
    
    # Convert 'yearOfRegistration' to numeric
    df['yearOfRegistration'] = pd.to_numeric(df['yearOfRegistration'], errors='coerce')

    # Calculate the age of the vehicle
    df['yearOfRegistration'] = df['yearOfRegistration'].apply(lambda x: 2023 - x)

    # Rename column name from 'yearOfRegistration' to 'YearsFromRegistration'
    df.rename(columns={'YearsFromRegistration': 'yearOfRegistration'}, inplace=True)

    # Convert 'yearOfRegistration' to numeric
    df['monthOfRegistration'] = pd.to_numeric(df['monthOfRegistration'], errors='coerce')
    
    # Convert 'yearOfRegistration' to numeric
    df['powerPS'] = pd.to_numeric(df['powerPS'], errors='coerce')
    
    # Convert 'yearOfRegistration' to numeric
    df['kilometer'] = pd.to_numeric(df['kilometer'], errors='coerce')


    # Perform binning for "monthOfRegistration" column
    bins = [0, 3, 6, 9, 12]
    labels = ["Q1", "Q2", "Q3", "Q4"]
    df['monthOfRegistration'] = pd.cut(df['monthOfRegistration'], bins=bins, labels=labels)
    df = pd.get_dummies(df, columns=['monthOfRegistration'], drop_first=True)

    return df

# Function to predict price
def predict_price(features):
    # Load the features into a DataFrame
    input_data = pd.DataFrame(features, index=[0])

    # Perform feature engineering
    input_data = perform_feature_engineering(input_data)

    # Predict the price using the pre-trained model
    predicted_price = model.predict(input_data)

    return predicted_price[0]

# Streamlit app
def main():
    st.title("Car Price Prediction App")

    # Get user inputs
    features = {}
    for feature in ['dateCrawled', 'name', 'seller', 'offerType', 'abtest', 'vehicleType', 'yearOfRegistration',
                    'gearbox', 'powerPS', 'model', 'kilometer', 'monthOfRegistration', 'fuelType', 'brand',
                    'notRepairedDamage', 'dateCreated', 'postalCode', 'lastSeen']:
        features[feature] = st.text_input(feature, '')

    # Predict button
    if st.button("Predict Price"):
        # Perform prediction
        predicted_price = predict_price(features)

        # Display the predicted price
        st.success(f"Predicted Price: {predicted_price:.2f} USD")

if __name__ == '__main__':
    main()


2023-12-14 15:13:11.847 INFO    numexpr.utils: NumExpr defaulting to 4 threads.
2023-12-14 15:13:23.028 
  command:

    streamlit run C:\Users\Jagadeeshilpi\anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


SyntaxError: invalid syntax (Temp/ipykernel_12520/3126652622.py, line 1)