In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import pickle
import gradio as gr

# Load the data into a pandas DataFrame
data = pd.read_csv('Housing.csv')

# Drop the 'furnishingstatus' column
data = data.drop('furnishingstatus', axis=1)

# Separate features and target
X = data.drop('price', axis=1)
y = data['price']

# Encode categorical variables
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
le = LabelEncoder()

for col in categorical_cols:
    X[col] = le.fit_transform(X[col])

# Scale numerical features using MinMaxScaler
numerical_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
X_scaler = MinMaxScaler()
X[numerical_cols] = X_scaler.fit_transform(X[numerical_cols])

# Scale the target variable (price) using MinMaxScaler
y_scaler = MinMaxScaler()
y = y_scaler.fit_transform(y.values.reshape(-1, 1)).ravel()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data preprocessing completed.")

# Train SVM model
svm_model = SVR(kernel='rbf', C=100, gamma='auto')
svm_model.fit(X_train, y_train)

# Evaluate SVM model
y_pred_svm = svm_model.predict(X_test)
mse_svm = mean_squared_error(y_test, y_pred_svm)
r2_svm = r2_score(y_test, y_pred_svm)

print(f"SVM Mean Squared Error: {mse_svm}")
print(f"SVM R² Score: {r2_svm}")

# Train XGBoost model
xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)

# Hyperparameter tuning for XGBoost
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_xgb_model = grid_search.best_estimator_

# Evaluate XGBoost model
y_pred_xgb = best_xgb_model.predict(X_test)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f"XGBoost Mean Squared Error: {mse_xgb}")
print(f"XGBoost R² Score: {r2_xgb}")

# Save the optimized XGBoost model
with open('optimized_xgb_model.pkl', 'wb') as f:
    pickle.dump(best_xgb_model, f)

# Save the feature scaler (X_scaler)
with open('minmax_X_scaler.pkl', 'wb') as f:
    pickle.dump(X_scaler, f)

# Save the target scaler (y_scaler)
with open('minmax_y_scaler.pkl', 'wb') as f:
    pickle.dump(y_scaler, f)

# Save the label encoders
label_encoders = {col: le for col in categorical_cols}
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

print("Model and scalers saved successfully.")

# Load the saved model, scalers, and encoders
with open('optimized_xgb_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

with open('minmax_X_scaler.pkl', 'rb') as f:
    loaded_X_scaler = pickle.load(f)

with open('minmax_y_scaler.pkl', 'rb') as f:
    loaded_y_scaler = pickle.load(f)

with open('label_encoders.pkl', 'rb') as f:
    loaded_encoders = pickle.load(f)

# Function to preprocess user input
def preprocess_input(user_input):
    input_df = pd.DataFrame([user_input])

    categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
    for col in categorical_cols:
        input_df[col] = loaded_encoders[col].transform(input_df[col])

    numerical_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
    input_df[numerical_cols] = loaded_X_scaler.transform(input_df[numerical_cols])

    return input_df

# Prediction function for Gradio
def predict_house_price(area, bedrooms, bathrooms, stories, mainroad, guestroom,
                       basement, hotwaterheating, airconditioning, parking, prefarea):
    # Create dictionary from user inputs
    user_input = {
        'area': float(area),
        'bedrooms': int(bedrooms),
        'bathrooms': int(bathrooms),
        'stories': int(stories),
        'mainroad': mainroad.lower(),
        'guestroom': guestroom.lower(),
        'basement': basement.lower(),
        'hotwaterheating': hotwaterheating.lower(),
        'airconditioning': airconditioning.lower(),
        'parking': int(parking),
        'prefarea': prefarea.lower()
    }

    # Preprocess the input
    processed_input = preprocess_input(user_input)

    # Predict the price (in scaled form)
    predicted_price_scaled = loaded_model.predict(processed_input)

    # Reverse-scale the prediction to original units
    predicted_price = loaded_y_scaler.inverse_transform(predicted_price_scaled.reshape(-1, 1))[0][0]

    return f"Predicted house price: {predicted_price:,.2f}"

# Define Gradio interface
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import pickle
import gradio as gr

# Load the data into a pandas DataFrame
data = pd.read_csv('Housing.csv')

# Drop the 'furnishingstatus' column
data = data.drop('furnishingstatus', axis=1)

# Separate features and target
X = data.drop('price', axis=1)
y = data['price']

# Encode categorical variables
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
le = LabelEncoder()

for col in categorical_cols:
    X[col] = le.fit_transform(X[col])

# Scale numerical features using MinMaxScaler
numerical_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
X_scaler = MinMaxScaler()
X[numerical_cols] = X_scaler.fit_transform(X[numerical_cols])

# Scale the target variable (price) using MinMaxScaler
y_scaler = MinMaxScaler()
y = y_scaler.fit_transform(y.values.reshape(-1, 1)).ravel()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data preprocessing completed.")

# Train SVM model
svm_model = SVR(kernel='rbf', C=100, gamma='auto')
svm_model.fit(X_train, y_train)

# Evaluate SVM model
y_pred_svm = svm_model.predict(X_test)
mse_svm = mean_squared_error(y_test, y_pred_svm)
r2_svm = r2_score(y_test, y_pred_svm)

print(f"SVM Mean Squared Error: {mse_svm}")
print(f"SVM R² Score: {r2_svm}")

# Train XGBoost model
xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)

# Hyperparameter tuning for XGBoost
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2]
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_xgb_model = grid_search.best_estimator_

# Evaluate XGBoost model
y_pred_xgb = best_xgb_model.predict(X_test)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f"XGBoost Mean Squared Error: {mse_xgb}")
print(f"XGBoost R² Score: {r2_xgb}")

# Save the optimized XGBoost model
with open('optimized_xgb_model.pkl', 'wb') as f:
    pickle.dump(best_xgb_model, f)

# Save the feature scaler (X_scaler)
with open('minmax_X_scaler.pkl', 'wb') as f:
    pickle.dump(X_scaler, f)

# Save the target scaler (y_scaler)
with open('minmax_y_scaler.pkl', 'wb') as f:
    pickle.dump(y_scaler, f)

# Save the label encoders
label_encoders = {col: le for col in categorical_cols}
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(label_encoders, f)

print("Model and scalers saved successfully.")

# Load the saved model, scalers, and encoders
with open('optimized_xgb_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

with open('minmax_X_scaler.pkl', 'rb') as f:
    loaded_X_scaler = pickle.load(f)

with open('minmax_y_scaler.pkl', 'rb') as f:
    loaded_y_scaler = pickle.load(f)

with open('label_encoders.pkl', 'rb') as f:
    loaded_encoders = pickle.load(f)

# Function to preprocess user input
def preprocess_input(user_input):
    input_df = pd.DataFrame([user_input])

    categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
    for col in categorical_cols:
        input_df[col] = loaded_encoders[col].transform(input_df[col])

    numerical_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
    input_df[numerical_cols] = loaded_X_scaler.transform(input_df[numerical_cols])

    return input_df

# Prediction function for Gradio
def predict_house_price(area, bedrooms, bathrooms, stories, mainroad, guestroom,
                       basement, hotwaterheating, airconditioning, parking, prefarea):
    # Create dictionary from user inputs
    user_input = {
        'area': float(area),
        'bedrooms': int(bedrooms),
        'bathrooms': int(bathrooms),
        'stories': int(stories),
        'mainroad': mainroad.lower(),
        'guestroom': guestroom.lower(),
        'basement': basement.lower(),
        'hotwaterheating': hotwaterheating.lower(),
        'airconditioning': airconditioning.lower(),
        'parking': int(parking),
        'prefarea': prefarea.lower()
    }

    # Preprocess the input
    processed_input = preprocess_input(user_input)

    # Predict the price (in scaled form)
    predicted_price_scaled = loaded_model.predict(processed_input)

    # Reverse-scale the prediction to original units
    predicted_price = loaded_y_scaler.inverse_transform(predicted_price_scaled.reshape(-1, 1))[0][0]

    return f"Predicted house price: {predicted_price:,.2f}"

# Define Gradio interface
interface = gr.Interface(
    fn=predict_house_price,
    inputs=[
        gr.Textbox(label="Area (in sq.ft)", placeholder="e.g., 7420"),
        gr.Dropdown(label="Number of bedrooms", choices=[1, 2, 3, 4, 5, 6], value=3),
        gr.Dropdown(label="Number of bathrooms", choices=[1, 2, 3, 4], value=1),
        gr.Dropdown(label="Number of stories", choices=[1, 2, 3, 4], value=1),
        gr.Radio(label="Main road", choices=["yes", "no"], value="yes"),
        gr.Radio(label="Guest room", choices=["yes", "no"], value="no"),
        gr.Radio(label="Basement", choices=["yes", "no"], value="no"),
        gr.Radio(label="Hot water heating", choices=["yes", "no"], value="no"),
        gr.Radio(label="Air conditioning", choices=["yes", "no"], value="no"),
        gr.Dropdown(label="Number of parking spaces", choices=[0, 1, 2, 3], value=0),
        gr.Radio(label="Preferred area", choices=["yes", "no"], value="no")
    ],
    outputs=gr.Textbox(label="Prediction"),
    title="House Price Prediction",
    description="Enter the house details to predict its price using an XGBoost model."
)

# Launch the interface
interface.launch()

Data preprocessing completed.
SVM Mean Squared Error: 0.014630938756165703
SVM R² Score: 0.6138536587111296
XGBoost Mean Squared Error: 0.015263399794639771
XGBoost R² Score: 0.5971614614376223
Model and scalers saved successfully.
Data preprocessing completed.
SVM Mean Squared Error: 0.014630938756165703
SVM R² Score: 0.6138536587111296
XGBoost Mean Squared Error: 0.015263399794639771
XGBoost R² Score: 0.5971614614376223
Model and scalers saved successfully.
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://34619a68fdc03ae1ea.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy



# New Section