In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import scipy.stats as stats


from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
csv_link = 'https://drive.google.com/file/d/1FUdTrrP8FEutHOMV6mPlgkoLl8yX4bfd/view?usp=sharing'

file_id = csv_link.split('/')[-2]

download_link = f'https://drive.google.com/uc?id={file_id}'

data = pd.read_csv(download_link)
data=data.dropna()

In [None]:
# Our targeted variable
X = data.drop('price', axis=1)
y = data['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=15)

gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=15)

gb_regressor.fit(X_train, y_train)

y_pred = gb_regressor.predict(X_test)

In [None]:
def get_value(input_col):

    min_list = [290, 520, 290, 0, 1900, 0, 47.1559, -122.387, 399, 651]
    max_list = [4230, 19141, 3740, 4820, 2015, 2015, 47.7776, -121.979, 3660, 17550]

    bias = 1000000
    if (input_col['sqft_living'] > 2000 or input_col['sqft_above'] > 2000):
        bias *= 2
    if (input_col['sqft_living'] > 3000 or input_col['sqft_above'] > 3000):
        bias *= 2
    if (input_col['sqft_living'] > 3500 or input_col['sqft_above'] > 3500):
        bias *= 4
    if (input_col['sqft_living'] > 5000 or input_col['sqft_above'] > 5000):
        bias *= 2
    if (input_col['sqft_living'] > 7000 or input_col['sqft_above'] > 7000):
        bias *= 2

    input_col['sqft_living']*=input_col['floors']

    label_encoded_values = [int(input_col[col]) - 1 for col in ['floors', 'bedrooms', 'bathrooms', 'grade', 'condition']]

    normalized_values = [(input_col[col] - min_val) / (max_val - min_val) for col, min_val, max_val in zip(
        ['sqft_living', 'sqft_lot', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'lat', 'long',
         'sqft_living15', 'sqft_lot15'], min_list, max_list)]

    result = label_encoded_values + normalized_values

    columns_order = [
        'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
        'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built',
        'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15'
    ]

    result = [result[columns_order.index(col)] for col in columns_order]

    result_array = np.array(result).reshape(1, -1)
    predicted_price = gb_regressor.predict(result_array)

    # Adjusting the predicted price within the specified range
    adjusted_price = ((predicted_price * (37207500 - 2475000)) + 2475000 + bias)[0]
    # Rounding to the nearest hundred thousand and converting to int
    rounded_price = int(round(adjusted_price / 100000) * 100000)

    # Formatting the price with commas
    formatted_price = '{:,}'.format(rounded_price)

    return formatted_price

In [None]:
!pip install anvil-uplink
import anvil.server

anvil.server.connect("server_D7GASJG7U7YKXXA7FXX5VGGX-SGP5HQIIJUYSFPJP")

Collecting argparse (from anvil-uplink)
  Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


In [None]:
from anvil import *

import anvil.media

@anvil.server.callable
def pred_val(input_col):
    try:
        result = get_value(input_col)
        return f"Estimated Value: Rs.{result}"
    except Exception as e:
        return "Invalid Data"