In [11]:
from azureml.core import Workspace, Run, Experiment
ws = Workspace.from_config()
df = ws.datasets['mammographic _data'].to_pandas_dataframe()

experiment = Experiment(workspace=ws, name="mamo-exp")
run = experiment.start_logging()
run.log('total observations', len(df))
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
df = df.rename(columns={'diagnosis':'Label'})
y = df["Label"].values
Y = labelencoder.fit_transform(y)


In [12]:
from sklearn.preprocessing import MinMaxScaler
X = df.drop(labels = ["Label", "id"], axis=1)
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
print(X)  #Scaled values

[[0.52103744 0.0226581  0.54598853 ... 0.91202749 0.59846245 0.41886396]
 [0.64314449 0.27257355 0.61578329 ... 0.63917526 0.23358959 0.22287813]
 [0.60149557 0.3902604  0.59574321 ... 0.83505155 0.40370589 0.21343303]
 ...
 [0.45525108 0.62123774 0.44578813 ... 0.48728522 0.12872068 0.1519087 ]
 [0.64456434 0.66351031 0.66553797 ... 0.91065292 0.49714173 0.45231536]
 [0.03686876 0.50152181 0.02853984 ... 0.         0.25744136 0.10068215]]


In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
print("Shape of training data is: ", X_train.shape)
print("Shape of testing data is: ", X_test.shape)

Shape of training data is:  (426, 30)
Shape of testing data is:  (143, 30)


In [14]:
from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression()
logisticRegr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [15]:
score = logisticRegr.score(X_test, y_test)
print(score)

0.986013986013986


In [16]:
from sklearn.metrics import confusion_matrix
Y_predict = logisticRegr.predict(X_test)
cm = confusion_matrix(y_test, Y_predict)
cm

array([[89,  0],
       [ 2, 52]])

In [17]:
import joblib
joblib.dump(logisticRegr, 'outputs/model.sav')

['outputs/model.sav']

In [None]:
run.upload_file("outputs/model.sav", "outputs/model.sav")

In [None]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
model = run.register_model(model_name='sklearn-mamo', 
                           model_path='outputs/model.sav',
                           model_framework=Model.Framework.SCIKITLEARN,
                           model_framework_version='0.19.1',
                           resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5))

In [22]:
run.complete()