In [None]:
import numpy as np
import pandas as pd
import pickle
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import explained_variance_score

import warnings  
warnings.filterwarnings('ignore')

In [None]:
#load the SDK to create a TrainingSet
#from domino_data_sdk.trainingset import client, model # for older version of domino_data
from domino_data.training_sets import client, model

In [None]:
dataset_raw = pd.read_csv("kc_house_data.csv")

In [None]:
dataset_raw.head()

In [None]:
# Dropping id and date for regression
dataset = dataset.drop(['event_id', 'date'], axis=1)
dataset.head()

### Code demonstrating creation of a training set

In [None]:
training_set_name = ""     # Fill in a unique name for your training set 

tsv = client.create_training_set_version(
    training_set_name=training_set_name,
    df=dataset,
    target_columns=["price"],
    exclude_columns=[],
    monitoring_meta=model.MonitoringMeta(**{
        "categorical_columns": [],
        "timestamp_columns": [],
        "ordinal_columns": []
    }),
    project_name = os.environ['DOMINO_PROJECT_NAME']
)

print(f"TrainingSetVersion {tsv.training_set_name}:{tsv.number}")

### Example code to read the training set just created

In [None]:
read_tsv = client.get_training_set_version(training_set_name, number=1)
print(f"TrainingSetVersion {read_tsv.training_set_name}:{read_tsv.number}")
read_tsv.load_training_pandas().head()

In [None]:
#Split into independents and dependents for training the model
X = dataset.drop(['price'], axis=1)
y = dataset.loc[:,'price']

In [None]:
#Splitting the data into train,test data 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=0)

In [None]:
tr_regressor = DecisionTreeRegressor(random_state=0)
tr_regressor.fit(X_train,y_train)
tr_regressor.score(X_test,y_test)
pred_tr = tr_regressor.predict(X_test)
decision_score=tr_regressor.score(X_test,y_test)
expl_tr = explained_variance_score(pred_tr,y_test)

In [None]:
models_score =pd.DataFrame({'Model':['Decision Tree Regressor'],
                            'Score':[decision_score],
                            'Explained Variance Score':[expl_tr]
                           })
models_score

In [None]:
# Save the model to disk
filename = 'price_dt_py.sav'
pickle.dump(tr_regressor, open(filename, 'wb'))

In [None]:
# For testing
import pickle

# Load the pickle file
model_file_name = "price_dt_py.sav"
model = pickle.load(open(model_file_name, 'rb'))

X_new = [[1.417779575520277,1.2235334622079015,1376.2808414176138,-44282.09192720655,
1.0663144705090994,-0.016425213918793445,-0.12412711151320194,3.010860708305122,
6.446308650295107,1134.2601420161861,83.01687506148825,1950.9720074899026,38.33997291730212,
98187.71575450613,47.512356777412954,-122.29738607549102,1162.588195657737,-15814.149590757263]]

model.predict(X_new)