In [None]:
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load df11 from the CSV file
df11 = pd.read_csv('df11.csv')

# Features and target variable
X = df11[['Location', 'Size', 'Rooms', 'Bathrooms']]
y = df11['Price']

# Encode categorical features (e.g., Location)
le = LabelEncoder()
X['Location'] = le.fit_transform(X['Location'])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

# Make predictions on the test set
y_pred = dt.predict(X_test)

# Calculate performance metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Streamlit app title
st.title("KL Property Price Prediction")

# User inputs
location = st.text_input("Location")
size = st.number_input("Size (in sqft)", min_value=500, max_value=10000, step=100)
rooms = st.number_input("Number of Rooms", min_value=1, max_value=10, step=1)
bathrooms = st.number_input("Number of Bathrooms", min_value=1, max_value=10, step=1)

# Prediction button
if st.button("Predict"):
    # Prepare the input data
    new_data = {
        'Location': location,
        'Size': size,
        'Rooms': rooms,
        'Bathrooms': bathrooms
    }
    new_data_df = pd.DataFrame([new_data])

    # Transform the Location column using the label encoder
    try:
        new_data_df['Location'] = le.transform(new_data_df['Location'])
    except ValueError:
        st.error(f"Location '{location}' is not recognized. Please enter a valid location.")
        st.stop()

    # Ensure the new data has the same columns as the training data
    input_data = new_data_df[X_train.columns]

    # Make prediction
    predicted_price = dt.predict(input_data)

    st.write(f"Predicted Price: RM{predicted_price[0]:,.2f}")

# Display performance metrics
st.subheader("Model Performance Metrics")
st.write(f"Mean Absolute Error (MAE): {mae}")
st.write(f"Mean Squared Error (MSE): {mse}")
st.write(f"R-squared: {r2}")





ModuleNotFoundError: No module named 'streamlit'