In [1]:
# INSPIRED BY: https://docs.microsoft.com/en-us/sql/advanced-analytics/tutorials/python-ski-rental-linear-regression?view=sql-server-ver15

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from revoscalepy import RxComputeContext, RxInSqlServer, RxSqlServerData
from revoscalepy import rx_import

conn_str = 'Driver=SQL Server;Server=DEVNOTEBOOK\MSSQLSERVER01;Database=ML;Trusted_Connection=True;'

column_info = {
     "ConditionCode" : { "type" : "integer" },
     "Sqft" : { "type" : "integer" },
     "NumBedrooms" : { "type" : "integer" },                  
     "Age" : { "type" : "integer" },         
     "TotalAV" : { "type" : "integer" }
 }
# Create data source object
data_source = RxSqlServerData(
    table="dbo.AssessmentTrainingDataMultiVarLinReg", 
    connection_string=conn_str, 
    column_info=column_info
)
# Specify compute context
RxInSqlServer(
     connection_string = conn_str,
     num_tasks = 1,
     auto_cleanup = False
)
RxInSqlServer(
    connection_string=conn_str, 
    num_tasks=1, 
    auto_cleanup=False
)

# Convert DB data to data-frame
data_frame = pd.DataFrame(
    rx_import(input_data = data_source)
)

# Create list of column names contained in data
columns = data_frame.columns.tolist()

# Specify the field we want to predict
prediction_target = "TotalAV"

# Specify how much data to use in the training set. 
training_data = data_frame.sample(
    frac=0.8, 
    random_state=1
)

# Select anything not in the training set and drop it in a testing set.
testing_data = data_frame.loc[~data_frame.index.isin(training_data.index)]

print(
    "Training data shape:", 
    training_data.shape
)
print(
    "Testing data shape:", 
    testing_data.shape
)

linear_regression_model = LinearRegression()

linear_regression_model.fit(
    training_data[columns], 
    training_data[prediction_target]
)

predictions = linear_regression_model.predict(testing_data[columns])

error = mean_squared_error(predictions, testing_data[prediction_target])
print(
    "Error:", 
    error
)



Express Edition will continue to be enforced.
Rows Read: 10410, Total Rows Processed: 10410, Total Chunk Time: 0.016 seconds 
Training data shape: (8328, 5)
Testing data shape: (2082, 5)
Error: 3.29309568099e-26
