In [1]:
workspace="JYSK_MLSERVICE2"
subscription_id="a2c669ee-7c2d-4ba9-8069-b865a02705e2"
resource_group="JYSK_DEMO_2"

experiment_name = "Marketing_effects"
model_name = "databricksmodel.mml" # in case you want to change the name, keep the .mml extension

In [2]:
import azureml.core
from azureml.core import Workspace
from azureml.core.run import Run
from azureml.core.experiment import Experiment
import shutil

In [3]:
ws = Workspace(workspace_name = workspace,
              subscription_id = subscription_id,
              resource_group = resource_group)
ws.get_details()

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [4]:
url = 'https://raw.githubusercontent.com/bachzwergius/JYSK_POC/master/advertising.csv'
df = pd.read_csv(url)
df.head(5)

In [5]:
# Supress Warnings

import warnings
warnings.filterwarnings('ignore')

# Import the numpy and pandas package

import numpy as np
import pandas as pd

# Data Visualisation
import matplotlib.pyplot as plt 
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib

In [6]:
X = df.drop('Sales', axis=1)
y = df[['Sales']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                  random_state=1)

In [7]:
myexperiment = Experiment(ws, experiment_name)
root_run = myexperiment.start_logging()

In [8]:
with root_run.child_run("test") as run:
  reg = LinearRegression()
  model = reg.fit(X_train, y_train)
  y_predicted = model.predict(X_test)
  
  MSE = mean_squared_error(y_test, y_predicted)
  RSQ = r2_score(y_test, y_predicted)
  
  run.log("MSE", MSE)
  run.log("RSQ", RSQ)
  
   # Save the model to the outputs directory for capture
  model_file_name = 'outputs/'+model_name

  joblib.dump(value = model, filename = model_file_name)

 # upload the model file explicitly into artifacts 
  run.upload_file(name = model_file_name, path_or_stream = model_file_name)
  
#run.register_model(model_name='sklearn-iris', model_path='model.joblib')

 # Complete the run
  run.complete()

In [9]:
shutil.rmtree('outputs/')
root_run.complete()

In [10]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [11]:
y_predicted = reg.predict(X_test)
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_predicted))
print('R²: %.2f' % r2_score(y_test, y_predicted))