In [99]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
from sklearn.tree import DecisionTreeRegressor
from imblearn.metrics import sensitivity_specificity_support
from sklearn import svm
from sklearn import datasets
import pickle

In [92]:
def readCSV():
    data = pd.read_csv('Sold_And_Stats_Edited_New_a.csv')
    return data

def cleanData(data):
    data.drop(['Listing Number', 'Street Number', 'Street Number Modifier', 'Street Direction', 'Street Name', 'Street Suffix', 'Street Post Direction', 'City', 'State', 'Area', 'Selling Date', 'Style Code', 'Sold to List Price Percentage'], axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data.dropna(subset=["Bathrooms", "Bedrooms", "Listing Price"], how="all", inplace=True)
    data.dropna(how='any', inplace=True)
    data = data.fillna(0)
    data[["Bathrooms", "Bedrooms", "Listing Price", "Selling Price"]] = data[["Bedrooms", "Bathrooms", "Listing Price", "Selling Price"]].apply(pd.to_numeric)
    data = data.astype({"Bathrooms": int, "Bedrooms": int, "Listing Price": int, "Selling Price": int})
    PropertyType = {'House': 1, 'Condo': 2, 'Townhouse': 3,}
    data.replace({"Property Type": PropertyType}, regex=True, inplace=True)
    data = data.astype({"Property Type": int})
    data['Listing Price'] = data['Listing Price'].round(decimals=2)
    data.loc[(data['Bedrooms'] == 0)]
    data.loc[(data['Bathrooms'] == 0)]
    data.sort_values(by=['Listing Price'],ascending=False)
    data.loc[(data['Listing Price'] > 0)].sort_values(by=['Listing Price'])
    data.sort_values(by=['Square Footage'],ascending=False)
    missing = data.loc[(data['Listing Price'] == 0)].append(data.loc[(data['Bathrooms'] == 0)])
    missing_index_list = missing.reset_index()['index'].to_list()
    missing_index_list.sort(reverse = True)
    data = data.drop(missing_index_list,axis=0)

    return data

def trainModel(data):

    y = data['Selling Price']
    X = data.drop(['Selling Price'],axis=1)


    dt = DecisionTreeRegressor(max_depth=18)
    dt.fit(X, y)
    
    return dt


def saveModel(dt):
    with open('OfferAidmodel.pkl','wb') as f:
        pickle.dump(dt,f)

def getresponse(userDF, aggressionLevel, dt):
    offerEstimate = dt.predict(userDF)
    if aggressionLevel == "low":
        offerEstimate = offerEstimate*.95
    elif aggressionLevel == "high":
        offerEstimate = offerEstimate*1.05

    return f"Using the information provided, {offerEstimate} would be a reasonable offer for this property"

def execute():
    data = readCSV()
    data = cleanData(data)
    dtpred = trainModel(data)
    saveModel(dtpred)

In [93]:
execute()

In [100]:
userData = [{'Zip Code': 98056, 'Bathrooms': 1, 'Bedrooms': 3, 'Lot Square Footage ': 6000, 'Listing Price': 160000, 'Square Footage': 900, 'Property Type': 1}]
userDF = pd.DataFrame(userData)

# load
with open('OfferAidmodel.pkl', 'rb') as f:
    clf2 = pickle.load(f)

clf2.predict(userDF)

#aggressionLevel = "high"

#getresponse(userDF, aggressionLevel, dtpred)

array([182500.])

In [115]:

import boto3

cred = boto3.Session().get_credentials()
ACCESS_KEY = cred.access_key
SECRET_KEY = cred.secret_key

s3client = boto3.client('s3', 
                        aws_access_key_id = ACCESS_KEY, 
                        aws_secret_access_key = SECRET_KEY, 
                       )

response = s3client.get_object(Bucket='offeraiddataset', Key='OfferAidmodel.pkl')

body = response['Body'].read()
data = pickle.loads(body)

data.predict(userDF)

array([182500.])