In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Exports scikit-learn model to json
from sklearn_model.export import Model

In [6]:
# Load Dataset
df = pd.read_csv("assets/regr.csv")

In [7]:
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df),
                     columns = list(df.columns))
df_scaled.head()

Unnamed: 0,X1 house age,X2 distance to the nearest MRT station,X3 number of convenience stores,X4 latitude,X5 longitude,Y house price of unit area
0,1.255628,-0.792495,2.007407,1.12543,0.448762,-0.005901
1,0.157086,-0.616612,1.667503,0.912444,0.401139,0.310507
2,-0.387791,-0.414015,0.307885,1.48686,0.688183,0.685782
3,-0.387791,-0.414015,0.307885,1.48686,0.688183,1.237656
4,-1.117223,-0.549997,0.307885,0.834188,0.592937,0.376732


In [9]:
# The independent variables
dfX = df_scaled.drop("Y house price of unit area", axis =1)
dfX.head()

Unnamed: 0,X1 house age,X2 distance to the nearest MRT station,X3 number of convenience stores,X4 latitude,X5 longitude
0,1.255628,-0.792495,2.007407,1.12543,0.448762
1,0.157086,-0.616612,1.667503,0.912444,0.401139
2,-0.387791,-0.414015,0.307885,1.48686,0.688183
3,-0.387791,-0.414015,0.307885,1.48686,0.688183
4,-1.117223,-0.549997,0.307885,0.834188,0.592937


In [10]:
# The target
dfY = df_scaled[["Y house price of unit area"]]
dfY.head()

Unnamed: 0,Y house price of unit area
0,-0.005901
1,0.310507
2,0.685782
3,1.237656
4,0.376732


In [11]:
# Get numpy arrays
X, y = dfX.values , dfY.values

In [12]:
# Perform train test split
X_train, X_test, y_train, y_test = train_test_split (X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state = 1)

In [13]:
# Fetch Y as a 1D array
y_train = y_train.flatten()
y_test = y_test.flatten()

In [15]:
# Fit Linear Regression Model
model_reg = LinearRegression().fit(X_train,y_train)

In [16]:
model_reg.score(X_train,y_train)

0.5372179466923446

In [18]:
# Score on test dataset R-sqaured Goodness of fit
model_reg.score(X_test,y_test)

0.7007314553053123

In [19]:
# Start the process of model export
mdl = Model()

# Add the details of input and output fields
mdl.add_fields(dfX, dfY)

# Since StandardScaler is used add the transformer
mdl.add_transformer(scaler, list(df_scaled.columns))

# Add the model
mdl.add_model(model_reg)

In [21]:
# View the exported model
print(mdl.exportJSON())

{
    "input": {
        "X1 house age": {
            "type": "float"
        },
        "X2 distance to the nearest MRT station": {
            "type": "float"
        },
        "X3 number of convenience stores": {
            "type": "float"
        },
        "X4 latitude": {
            "type": "float"
        },
        "X5 longitude": {
            "type": "float"
        }
    },
    "output": {
        "Y house price of unit area": {
            "type": "float"
        }
    },
    "transformer": {
        "type": "Standard",
        "scale_fields": {
            "X1 house age": {
                "mean": 17.71256038647343,
                "stddev": 11.378717175302857
            },
            "X2 distance to the nearest MRT station": {
                "mean": 1083.8856889130436,
                "stddev": 1260.5843868803893
            },
            "X3 number of convenience stores": {
                "mean": 4.094202898550725,
                "stddev": 2.94200221305731
    

In [22]:
# Save the model in a file
mdl.exportJSON('regr.json')