In [2]:
import pkg_resources
import pprint
installed_packages = pkg_resources.working_set
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
   for i in installed_packages])
pprint.pprint(installed_packages_list)

['aiobotocore==2.3.4',
 'aiohttp==3.8.1',
 'aioitertools==0.10.0',
 'aiosignal==1.2.0',
 'alabaster==0.7.12',
 'anaconda-client==1.7.2',
 'anaconda-project==0.8.3',
 'ansi2html==1.8.0',
 'argcomplete==2.0.0',
 'argh==0.26.2',
 'argon2-cffi-bindings==21.2.0',
 'argon2-cffi==21.3.0',
 'asn1crypto==1.3.0',
 'astroid==2.12.5',
 'astropy==4.0',
 'async-timeout==4.0.2',
 'asynctest==0.13.0',
 'atomicwrites==1.3.0',
 'attrs==21.4.0',
 'autopep8==1.4.4',
 'autovizwidget==0.20.0',
 'awscli==1.25.63',
 'babel==2.10.3',
 'backcall==0.1.0',
 'backports.shutil-get-terminal-size==1.0.0',
 'beautifulsoup4==4.8.2',
 'bitarray==1.2.1',
 'bkcharts==0.2',
 'bleach==5.0.1',
 'bokeh==1.4.0',
 'boto3==1.24.62',
 'boto==2.49.0',
 'botocore==1.27.62',
 'bottleneck==1.3.2',
 'brotli==1.0.9',
 'brotlipy==0.7.0',
 'cached-property==1.5.2',
 'certifi==2022.6.15',
 'cffi==1.15.0',
 'chardet==3.0.4',
 'charset-normalizer==2.0.4',
 'click==7.0',
 'cloudpickle==2.1.0',
 'clyent==1.2.2',
 'colorama==0.4.3',
 'colorlog

In [3]:
import sys
print(sys.path)

['/root/aws-studio-lifecycle-config-for-iris-orig/notebooks', '/opt/conda/lib/python37.zip', '/opt/conda/lib/python3.7', '/opt/conda/lib/python3.7/lib-dynload', '', '/root/.local/lib/python3.7/site-packages', '/opt/conda/lib/python3.7/site-packages', '/opt/conda/lib/python3.7/site-packages/IPython/extensions', '/root/.ipython']


Logistic Regression on IRIS Dataset 
using the Scikit-learn library.

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets

In [None]:
# Import the iris dataset
iris = datasets.load_iris()
X = iris.data[:, :4]  # keep first four features.
y = iris.target

In [None]:
dataset_file = iris.filename
dataset_file

In [None]:
print(f"X {X.shape}")
print(f"y {len(y)}")

In [None]:
print(X[:5, :])

In [None]:
# split dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [None]:
print(f"X_train {X_train.shape}")
print(f"X_test {X_test.shape}")
print(f"y_train {len(y_train)}")
print(f"y_test {len(y_test)}")

In [None]:
# normalize data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
print(X_train[:5, :])

In [None]:
# Fit Logistic Regression 
from sklearn.linear_model import LogisticRegression
solver = 'lbfgs'
classifier = LogisticRegression(random_state = 0, solver=solver, multi_class='auto')
classifier.fit(X_train, y_train)

In [None]:
# Predict using test set
y_pred = classifier.predict(X_test)

# Predict probabilities using test set
probs_y = classifier.predict_proba(X_test)
probs_y_rounded = np.round(probs_y, 2)

In [None]:
res = "{:<10} | {:<10} | {:<10} | {:<13} | {:<5}".format("y_test", "y_pred", "Setosa(%)", "versicolor(%)", "virginica(%)\n")
res += "-"*65+"\n"
res += "\n".join("{:<10} | {:<10} | {:<10} | {:<13} | {:<10}".format(x, y, a, b, c) for x, y, a, b, c in zip(y_test, y_pred, probs_y_rounded[:,0], probs_y_rounded[:,1], probs_y_rounded[:,2]))
res += "\n"+"-"*65+"\n"
print(res)

In [None]:
accuracy = classifier.score(X_test, y_pred)

In [None]:
print(f"accuracy {accuracy}")

In [None]:
# confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)


In [None]:
# Plot confusion matrix
import seaborn as sns
import pandas as pd
# confusion matrix sns heatmap 
fig = plt.figure()

ax = plt.axes()
df_cm = cm
sns.heatmap(df_cm, annot=True, annot_kws={"size": 30}, fmt='d',cmap="Blues", ax = ax )
ax.set_title('Confusion Matrix')
plt.show()

fig.savefig('cm.png')

In [None]:
# Plot the dataset points
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors="k", cmap=plt.cm.Paired)


plt.xticks(())
plt.yticks(())

plt.show()

In [1]:
# log trial

from smexperiments import trial, experiment, tracker
import uuid

import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()

try:
    my_experiment = experiment.Experiment.create(experiment_name='iris-LR')
except:
    my_experiment =experiment.Experiment.load(experiment_name='iris-LR')

tid = uuid.uuid1()
print(tid)
my_trial = my_experiment.create_trial(trial_name=f'LR-{tid}')

# use `with` statement to ensure `my_tracker.close()` is called
with tracker.Tracker.create(display_name="Training") as my_tracker:
    # log hyper parameter of learning rate
    my_tracker.log_parameter('solver', solver)
    # log dataset
    my_tracker.log_input(name='input', value=dataset_file)
    # log hyper metric of learning rate
    my_tracker.log_metric('accuracy', accuracy)
    my_tracker.log_metric(metric_name='accuracy', value=accuracy, iteration_number=1)

    # log a table
    #my_tracker.log_table('Confusion Matrix', cm)

    # log a figure
    my_tracker.log_artifact('cm.png', name='confusion-matrix-plot')
                            
    # associate the trial component with the trial
    my_trial.add_trial_component(my_tracker)

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


206268d8-56dd-11ed-a51f-3125bfbcccb3


NameError: name 'solver' is not defined