Copyright © 2020, SAS Institute Inc., Cary, NC, USA.  All Rights Reserved.
SPDX-License-Identifier: Apache-2.0

### Python Package Imports

In [None]:
# Dataframes for data manipulations
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
# Mathematical calculations and array handling
import numpy as np

# Data partitioning for TRAIN and TEST data sets
from sklearn.model_selection import train_test_split

# Embedded plotting
import matplotlib.pyplot as plt 
plt.rc("font", size=14)

# Pathing support
from pathlib import Path

# sasctl interface for importing models
import sasctl.pzmm as pzmm
from sasctl import Session
from sasctl.services import model_repository as modelRepo

### Import and Review Data Set

In [None]:
housingData = pd.read_csv('data/USA_Housing.csv',sep= ',')
housingData.shape

In [None]:
housingData = housingData.drop(['Address'], axis=1)
housingData.head()

In [None]:
housingData.columns

### Preprocess Data

In [None]:
# Input 
predictorColumns = ['Avg_Area_Income', 'Avg_Area_House_Age', 'Avg_Area_Number_of_Rooms', 
                    'Avg_Area_Number_of_Bedrooms', 'Area_Population']

# Target
targetColumn = 'Price'
x = housingData[predictorColumns]
y = housingData[targetColumn]

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.3, random_state=42)

# For missing values, impute the data set's mean value
xTest.fillna(xTest.mean(), inplace=True)
xTrain.fillna(xTrain.mean(), inplace=True)
print(xTest.shape)
print(xTrain.shape)

### Create, Train, and Assess Model

In [None]:
# Linear Regression Training
from sklearn.linear_model import LinearRegression
linReg = LinearRegression(normalize=True)
linReg.fit(xTrain, yTrain)

In [None]:
# Test Predictions
from sklearn import metrics
LRPredict = linReg.predict(xTest)
print(metrics.r2_score(yTest, LRPredict))

### Zip file for registering into SAS Model Manager

In [None]:
modelPrefix = 'LinearRegression'
zipFolder = Path.cwd() / 'data/USAHousingModels/LinearRegression'

pzmm.PickleModel.pickle_trained_model(linReg, modelPrefix, zipFolder)

In [None]:
def writeJSONFiles(data, predict, target, zipFolder, modelPrefix):
    J = pzmm.JSONFiles()
    
    # Write input variable mapping to a json file
    J.writeVarJSON(data[predict], isInput=True, jPath=zipFolder)
    
    # Set output variables and assign an event threshold, then write output variable mapping
    outputVar = pd.DataFrame(columns=['EM_PREDICTION'])
    outputVar['EM_PREDICTION'].loc[1] = 0.5
    J.writeVarJSON(outputVar, isInput=False, jPath=zipFolder)
    
    # Write model properties to a json file
    J.writeModelPropertiesJSON(modelName=modelPrefix,
                               modelDesc='',
                               targetVariable=target,
                               modelType='',
                               modelPredictors=predict,
                               targetEvent=None,
                               numTargetCategories=1,
                               eventProbVar='EM_PREDICTION',
                               jPath=zipFolder,
                               modeler='sasdemo')
    
    # Write model metadata to a json file
    J.writeFileMetadataJSON(modelPrefix, jPath=zipFolder)

writeJSONFiles(housingData, predictorColumns, targetColumn, zipFolder, modelPrefix)

In [None]:
import getpass
username = getpass.getpass()
password = getpass.getpass()
host = 'demo.sas.com'
sess = Session(host, username, password, protocol='http')

In [None]:
I = pzmm.ImportModel()
I.pzmmImportModel(zipFolder, modelPrefix, 'LinearRegressionModelExample', x, y, '{}.predict({})', force=True, metrics=['EM_PREDICTION', 'EM_PREDICTION'])