In [None]:
# Import pandas
import pandas as pd

# Load your dataset (replace 'your_file.csv' with your file name)
df = pd.read_csv('Housing.csv')

# Show the first 5 rows of your dataset
df.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [None]:
# Get info about the dataset (columns, types, missing values)
df.info()

# Check basic statistics
df.describe()

# Check for missing values
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


Unnamed: 0,0
price,0
area,0
bedrooms,0
bathrooms,0
stories,0
mainroad,0
guestroom,0
basement,0
hotwaterheating,0
airconditioning,0


In [None]:
# Show all column names
df.columns


Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [None]:
# Show first 5 rows to see some example data
df.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [None]:
# Check for missing values
df.isnull().sum()


Unnamed: 0,0
price,0
area,0
bedrooms,0
bathrooms,0
stories,0
mainroad,0
guestroom,0
basement,0
hotwaterheating,0
airconditioning,0


In [None]:
# Convert categorical features to numeric using one-hot encoding
df_encoded = pd.get_dummies(df, drop_first=True)

# Show the new columns
df_encoded.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,mainroad_yes,guestroom_yes,basement_yes,hotwaterheating_yes,airconditioning_yes,prefarea_yes,furnishingstatus_semi-furnished,furnishingstatus_unfurnished
0,13300000,7420,4,2,3,2,True,False,False,False,True,True,False,False
1,12250000,8960,4,4,4,3,True,False,False,False,True,False,False,False
2,12250000,9960,3,2,2,2,True,False,True,False,False,True,True,False
3,12215000,7500,4,2,2,3,True,False,True,False,True,True,False,False
4,11410000,7420,4,1,2,2,True,True,True,False,True,False,False,False


In [None]:
# Target variable
y = df_encoded['price']

# Feature variables
X = df_encoded.drop('price', axis=1)

# Check shapes
print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (545, 13)
Target shape: (545,)


In [None]:
from sklearn.model_selection import train_test_split

# Split the data: 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Check shapes
print("X_train:", X_train.shape)
print("X_test:", X_test.shape)
print("y_train:", y_train.shape)
print("y_test:", y_test.shape)


X_train: (436, 13)
X_test: (109, 13)
y_train: (436,)
y_test: (109,)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Create the model
lr_model = LinearRegression()

# Train the model
lr_model.fit(X_train, y_train)

# Predict on test set
y_pred = lr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R2 Score:", r2)


Mean Squared Error: 1754318687330.6638
R2 Score: 0.6529242642153184


In [None]:
from sklearn.linear_model import Ridge

# Create Ridge model
ridge_model = Ridge(alpha=1.0)  # alpha is the regularization strength

# Train the model
ridge_model.fit(X_train, y_train)

# Predict
y_ridge = ridge_model.predict(X_test)

# Evaluate
mse_ridge = mean_squared_error(y_test, y_ridge)
r2_ridge = r2_score(y_test, y_ridge)

print("Ridge Regression MSE:", mse_ridge)
print("Ridge Regression R2:", r2_ridge)


Ridge Regression MSE: 1756474279575.2856
Ridge Regression R2: 0.6524978002155007


In [None]:
from sklearn.linear_model import Lasso

# Create Lasso model
lasso_model = Lasso(alpha=0.1)  # alpha is the regularization strength

# Train the model
lasso_model.fit(X_train, y_train)

# Predict
y_lasso = lasso_model.predict(X_test)

# Evaluate
mse_lasso = mean_squared_error(y_test, y_lasso)
r2_lasso = r2_score(y_test, y_lasso)

print("Lasso Regression MSE:", mse_lasso)
print("Lasso Regression R2:", r2_lasso)


Lasso Regression MSE: 1754318945367.533
Lasso Regression R2: 0.6529242131651091


In [None]:
!pip install streamlit
!pip install pyngrok


Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m64.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m86.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [None]:
import pickle

# Save Linear Regression model
with open('lr_model.pkl', 'wb') as f:
    pickle.dump(lr_model, f)


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import pickle

# Load trained model
model = pickle.load(open('lr_model.pkl', 'rb'))

st.title("House Price Prediction")

# User inputs
area = st.number_input("Area (sq ft)")
bedrooms = st.number_input("Bedrooms", min_value=1, step=1)
bathrooms = st.number_input("Bathrooms", min_value=1, step=1)
stories = st.number_input("Stories", min_value=1, step=1)
parking = st.number_input("Parking", min_value=0, step=1)
mainroad = st.selectbox("Main Road?", ["Yes", "No"])
guestroom = st.selectbox("Guest Room?", ["Yes", "No"])
basement = st.selectbox("Basement?", ["Yes", "No"])
hotwaterheating = st.selectbox("Hot Water Heating?", ["Yes", "No"])
airconditioning = st.selectbox("Air Conditioning?", ["Yes", "No"])
prefarea = st.selectbox("Preferred Area?", ["Yes", "No"])
furnishingstatus = st.selectbox("Furnishing Status", ["Furnished", "Semi-Furnished", "Unfurnished"])

# Convert categorical to numeric
input_data = pd.DataFrame({
    'area':[area],
    'bedrooms':[bedrooms],
    'bathrooms':[bathrooms],
    'stories':[stories],
    'parking':[parking],
    'mainroad_yes':[1 if mainroad=="Yes" else 0],
    'guestroom_yes':[1 if guestroom=="Yes" else 0],
    'basement_yes':[1 if basement=="Yes" else 0],
    'hotwaterheating_yes':[1 if hotwaterheating=="Yes" else 0],
    'airconditioning_yes':[1 if airconditioning=="Yes" else 0],
    'prefarea_yes':[1 if prefarea=="Yes" else 0],
    'furnishingstatus_semi-furnished':[1 if furnishingstatus=="Semi-Furnished" else 0],
    'furnishingstatus_unfurnished':[1 if furnishingstatus=="Unfurnished" else 0]
})

if st.button("Predict Price"):
    prediction = model.predict(input_data)[0]
    st.success(f"Predicted House Price: ₦{prediction:,.0f}")


Writing app.py


In [None]:
!ngrok authtoken 35ujMdyaZdCb1oBcu63IWJ7kqsu_6VyqCJQM6xJnxitgjzmMW


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok
!streamlit run app.py &>/dev/null&
url = ngrok.connect(8501)
url


<NgrokTunnel: "https://pucka-pulingly-ileen.ngrok-free.dev" -> "http://localhost:8501">

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import pickle

# Load all trained models
lr_model = pickle.load(open('lr_model.pkl', 'rb'))
ridge_model = pickle.load(open('ridge_model.pkl', 'rb'))
lasso_model = pickle.load(open('lasso_model.pkl', 'rb'))

st.title("House Price Prediction")

# User inputs
area = st.number_input("Area (sq ft)")
bedrooms = st.number_input("Bedrooms", min_value=1, step=1)
bathrooms = st.number_input("Bathrooms", min_value=1, step=1)
stories = st.number_input("Stories", min_value=1, step=1)
parking = st.number_input("Parking", min_value=0, step=1)
mainroad = st.selectbox("Main Road?", ["Yes", "No"])
guestroom = st.selectbox("Guest Room?", ["Yes", "No"])
basement = st.selectbox("Basement?", ["Yes", "No"])
hotwaterheating = st.selectbox("Hot Water Heating?", ["Yes", "No"])
airconditioning = st.selectbox("Air Conditioning?", ["Yes", "No"])
prefarea = st.selectbox("Preferred Area?", ["Yes", "No"])
furnishingstatus = st.selectbox("Furnishing Status", ["Furnished", "Semi-Furnished", "Unfurnished"])

# Choose model
model_choice = st.selectbox("Choose Model", ["Linear Regression", "Ridge Regression", "Lasso Regression"])

# Convert categorical to numeric
input_data = pd.DataFrame({
    'area':[area],
    'bedrooms':[bedrooms],
    'bathrooms':[bathrooms],
    'stories':[stories],
    'parking':[parking],
    'mainroad_yes':[1 if mainroad=="Yes" else 0],
    'guestroom_yes':[1 if guestroom=="Yes" else 0],
    'basement_yes':[1 if basement=="Yes" else 0],
    'hotwaterheating_yes':[1 if hotwaterheating=="Yes" else 0],
    'airconditioning_yes':[1 if airconditioning=="Yes" else 0],
    'prefarea_yes':[1 if prefarea=="Yes" else 0],
    'furnishingstatus_semi-furnished':[1 if furnishingstatus=="Semi-Furnished" else 0],
    'furnishingstatus_unfurnished':[1 if furnishingstatus=="Unfurnished" else 0]
})

if st.button("Predict Price"):
    if model_choice == "Linear Regression":
        model = lr_model
    elif model_choice == "Ridge Regression":
        model = ridge_model
    else:
        model = lasso_model

    prediction = model.predict(input_data)[0]
    st.success(f"Predicted House Price: ₦{prediction:,.0f}")


Overwriting app.py


In [None]:
import pickle

# Save Linear Regression
with open("lr_model.pkl", "wb") as f:
    pickle.dump(lr_model, f)

# Save Ridge Regression
with open("ridge_model.pkl", "wb") as f:
    pickle.dump(ridge_model, f)

# Save Lasso Regression
with open("lasso_model.pkl", "wb") as f:
    pickle.dump(lasso_model, f)

print("Models saved successfully!")


Models saved successfully!


In [None]:
# Kill previous Streamlit
!kill $(pgrep streamlit)

# Start app again
!streamlit run app.py &>/dev/null&
from pyngrok import ngrok
url = ngrok.connect(8501)
url


<NgrokTunnel: "https://pucka-pulingly-ileen.ngrok-free.dev" -> "http://localhost:8501">

In [None]:
# Kill any running Streamlit processes
!kill $(pgrep streamlit)


kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]
