In [16]:
!pip install streamlit pyngrok pandas numpy scikit-learn



In [18]:
# ==========================================
# 🏠 HOUSE PRICE PREDICTION APP (Colab Final – Random Forest + HouseAge)
# ==========================================

import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from pyngrok import ngrok
import streamlit as st

# ---------------- Streamlit code ----------------
code = r'''
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import streamlit as st

@st.cache_data
def load_data():
    return pd.read_csv("housing_price_dataset.csv")

df = load_data()
st.title("🏡 House Price Prediction (Random Forest + House Age)")
st.write("Predict house prices more accurately with Random Forest!")

# ---- Clean & Prepare ----
df = df[df["Price"] > 0].dropna(subset=["Price"]).reset_index(drop=True)
df["HouseAge"] = 2025 - df["YearBuilt"]
df = df.drop(columns=["YearBuilt"])

encoder = OneHotEncoder(drop="first", sparse_output=False)
encoded = encoder.fit_transform(df[["Neighborhood"]])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(["Neighborhood"]))
df_encoded = pd.concat([df.drop(columns=["Neighborhood"]), encoded_df], axis=1)

# ---- Split & Train ----
X = df_encoded.drop(columns=["Price"])
y = df_encoded["Price"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
])
model.fit(X_train, y_train)

# ---- Evaluate ----
y_pred = model.predict(X_test)
mae, rmse = mean_absolute_error(y_test, y_pred), np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

st.subheader("📊 Model Performance")
st.write(f"MAE: {mae:.2f}")
st.write(f"RMSE: {rmse:.2f}")
st.write(f"R² Score: {r2:.2f}")

# ---- Prediction UI ----
st.subheader("🧮 Predict Your Own House Price")

sqft = st.number_input("Square Feet", 500, 10000, 1500)
bed  = st.number_input("Bedrooms", 1, 10, 3)
bath = st.number_input("Bathrooms", 1, 10, 2)
year = st.slider("Year Built", 1900, 2025, 2015)
neigh = st.selectbox("Neighborhood", encoder.categories_[0])

age = 2025 - year
inp = pd.DataFrame({
    "SquareFeet":[sqft],
    "Bedrooms":[bed],
    "Bathrooms":[bath],
    "HouseAge":[age],
    "Neighborhood":[neigh]
})

enc_inp = encoder.transform(inp[["Neighborhood"]])
enc_inp_df = pd.DataFrame(enc_inp, columns=encoder.get_feature_names_out(["Neighborhood"]))
final_inp = pd.concat([inp.drop(columns=["Neighborhood"]), enc_inp_df], axis=1)
for c in set(X.columns) - set(final_inp.columns): final_inp[c] = 0
final_inp = final_inp[X.columns]

if st.button("Predict Price"):
    pred = model.predict(final_inp)[0]
    st.success(f"💰 Predicted Price: ${pred:,.2f}")
'''

# Save app
with open("app.py", "w") as f: f.write(code)

# ---- Start Streamlit via ngrok ----
ngrok.kill()          # prevents “too many tunnels” errors
public_url = ngrok.connect(8501)
print("🔗 Your Streamlit app is live here:", public_url)
!streamlit run app.py &>/dev/null &


🔗 Your Streamlit app is live here: NgrokTunnel: "https://4fa229492b3b.ngrok-free.app" -> "http://localhost:8501"
