In [None]:
# 1. Environment Setup

!pip install pandas numpy scikit-learn sqlalchemy psycopg2-binary joblib flask

import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sqlalchemy import create_engine




In [None]:
# 2. Load Dataset
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/MyDrive/Data Mimining 1st LAB/house_info.csv')
print("âœ… Dataset Loaded Successfully!")
print("Shape:", df.shape)
display(df.head())

# Check missing values
print("\nMissing Values per Column:\n", df.isnull().sum())

# Step 1: Hierarchical Imputation (Numeric Columns)
for col in ['Area (sqft)', 'Number_of_rooms', 'Number_of_Bedrooms',
            'Number_of_Floors', 'Year_Built', 'Garage_Size', 'House_Price']:
    df[col] = df[col].fillna(df[col].median())

# Step 2: Fill categorical missing values
for col in ['Location', 'Property_Type', 'Furnishing_Status']:
    df[col] = df[col].fillna(df[col].mode()[0])

# Step 3: Remove duplicates
df = df.drop_duplicates()

# Step 4: Handle Outliers (using IQR)
for col in ['Area (sqft)', 'House_Price']:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    df = df[(df[col] >= Q1 - 1.5*IQR) & (df[col] <= Q3 + 1.5*IQR)].reset_index(drop=True) # Reset index after filtering

print("\nâœ… Cleaned Dataset Shape:", df.shape)

# 4. Feature Encoding
categorical_cols = ['Location', 'Property_Type', 'Furnishing_Status']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = encoder.fit_transform(df[categorical_cols])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))

# Merge with numeric columns AFTER outlier removal
X = pd.concat([df.drop(columns=['House_Price'] + categorical_cols), encoded_df], axis=1)
y = df['House_Price']

print("\nâœ… Features Prepared:", X.shape)

# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 6. Model Training (Linear Regression)
model = LinearRegression()
model.fit(X_train, y_train)

print("\nâœ… Model Trained Successfully!")

# 7. Model Evaluation
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"\nðŸ“Š Model Evaluation Results:")
print(f"MAE:  {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"RÂ²:   {r2:.4f}")

# 8. Save Model and Encoder
import os
os.makedirs("model", exist_ok=True)
joblib.dump(model, "model/house_model.pkl")
joblib.dump(encoder, "model/encoder.pkl")

print("\nðŸ’¾ Model and Encoder Saved Successfully!")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
âœ… Dataset Loaded Successfully!
Shape: (25, 10)


Unnamed: 0,Area (sqft),Number_of_rooms,Number_of_Bedrooms,Number_of_Floors,Location,Year_Built,Garage_Size,Property_Type,Furnishing_Status,House_Price
0,1660.0,2,5,2,Dhanmondi,2017,19.0,Apartment,Semi-Furnished,3528729
1,1580.0,30,25,6,Mirpur-10,2025,30.0,House,Furnished,28000000
2,1930.0,4,2,3,Mirpur-10,1998,28.0,House,Furnished,3053291
3,1895.0,4,4,2,Mirpur-10,1997,16.0,House,,2199657
4,,8,1,2,Dhanmondi,2001,26.0,House,Furnished,3959343



Missing Values per Column:
 Area (sqft)           3
Number_of_rooms       0
Number_of_Bedrooms    0
Number_of_Floors      0
Location              3
Year_Built            0
Garage_Size           3
Property_Type         0
Furnishing_Status     2
House_Price           0
dtype: int64

âœ… Cleaned Dataset Shape: (24, 10)

âœ… Features Prepared: (24, 15)

âœ… Model Trained Successfully!

ðŸ“Š Model Evaluation Results:
MAE:  2119404.92
RMSE: 2662066.11
RÂ²:   -8.8711

ðŸ’¾ Model and Encoder Saved Successfully!


In [None]:
# 1. PostgreSQL Connection (Insert + Query)

# Placeholder URI for deployment:
# postgresql://username:password@host:port/database

POSTGRES_URI = "postgresql://username:password@hostname:5432/house_db"

# For simulation in Colab, weâ€™ll use SQLite (local DB)
engine = create_engine("sqlite:///house_prices_raw.db")
df.to_sql('house_prices_raw', engine, if_exists='replace', index=False)

print("\nâœ… Data inserted into database (simulation).")

# Query check
pd.read_sql("SELECT * FROM house_prices_raw LIMIT 5;", engine)



âœ… Data inserted into database (simulation).


Unnamed: 0,Area (sqft),Number_of_rooms,Number_of_Bedrooms,Number_of_Floors,Location,Year_Built,Garage_Size,Property_Type,Furnishing_Status,House_Price
0,1660.0,2,5,2,Dhanmondi,2017,19.0,Apartment,Semi-Furnished,3528729
1,1930.0,4,2,3,Mirpur-10,1998,28.0,House,Furnished,3053291
2,1895.0,4,4,2,Mirpur-10,1997,16.0,House,Furnished,2199657
3,1957.0,8,1,2,Dhanmondi,2001,26.0,House,Furnished,3959343
4,2969.0,3,4,2,Mirpur-10,2023,29.0,Apartment,Semi-Furnished,4369469


In [None]:
# ============================================
# ðŸ”¹ 10. Run Flask API inside Google Colab
# ============================================

!pip install flask-ngrok > /dev/null 2>&1

from flask import Flask, request, jsonify
from flask_ngrok import run_with_ngrok
import joblib
import pandas as pd
from sqlalchemy import create_engine

# -----------------------------
# Load model + encoder
# -----------------------------
model = joblib.load("model/house_model.pkl")
encoder = joblib.load("model/encoder.pkl")

# -----------------------------
# Create Flask App
# -----------------------------
app = Flask(__name__)
run_with_ngrok(app)   # enables public URL in Colab

# SQLite for simulation (replace with PostgreSQL URI later)
engine = create_engine("sqlite:///colab_house_log.db")

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json()
        df = pd.DataFrame([data])

        categorical_cols = ['Location', 'Property_Type', 'Furnishing_Status']
        encoded = encoder.transform(df[categorical_cols])
        encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categorical_cols))
        X_input = pd.concat([df.drop(columns=categorical_cols), encoded_df], axis=1)

        pred = model.predict(X_input)[0]
        result = {"Predicted_House_Price": round(pred, 2)}

        # Log request + response
        pd.DataFrame([{
            "input_json": str(data),
            "predicted_price": pred
        }]).to_sql("api_logs", engine, if_exists='append', index=False)

        return jsonify(result)

    except Exception as e:
        return jsonify({"error": str(e)})

# -----------------------------
# Start Flask Server
# -----------------------------
app.run()


FileNotFoundError: [Errno 2] No such file or directory: 'model/house_model.pkl'