In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import seaborn as sb
import tensorflow as tf
from tensorflow import keras
from tensorflow.estimator import LinearClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
print(tf.__version__)

2.4.1


In [5]:
col_names = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']

target_dimensions = ['Setosa','Versicolor', 'Virginica']

training_data_path = tf.keras.utils.get_file("iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv


In [7]:
test_data_path = tf.keras.utils.get_file("iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv


In [8]:
training = pd.read_csv(training_data_path, names=col_names, header=0)
training = training[training['Species'] >= 1]
training['Species'] = training['Species'].replace([1,2], [0,1])
test = pd.read_csv(test_data_path, names=col_names, header=0)

In [9]:
test = test[test['Species'] >= 1]
test['Species'] = test['Species'].replace([1,2],[0,1])
training.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)
iris_dataset = pd.concat([training, test], axis=0)
iris_dataset.describe()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
count,100.0,100.0,100.0,100.0,100.0
mean,6.262,2.872,4.906,1.676,0.5
std,0.662834,0.332751,0.825578,0.424769,0.502519
min,4.9,2.0,3.0,1.0,0.0
25%,5.8,2.7,4.375,1.3,0.0
50%,6.3,2.9,4.9,1.6,0.5
75%,6.7,3.025,5.525,2.0,1.0
max,7.9,3.8,6.9,2.5,1.0


In [10]:
X_data = iris_dataset[[i for i in iris_dataset.columns if i not in ['Species']]]
Y_data = iris_dataset[['Species']]

In [11]:
training_features, test_features, training_labels, test_labels=train_test_split(X_data, Y_data, test_size=0.2)

In [12]:
def norm(x):
  stats = x.describe()
  stats = stats.transpose()
  return (x - stats['mean'])/stats['std']

normed_train_features = norm(training_features)
normed_test_features = norm(test_features)

In [21]:
def feed_input(features_dataframe, target_dataframe, num_of_epochs=10, shuffle=True,batch_size=32):
  def input_feed_function():
    dataset = tf.data.Dataset.from_tensor_slices((dict(features_dataframe), target_dataframe))
    if shuffle:
      dataset = dataset.shuffle(2000)
    dataset = dataset.batch(batch_size).repeat(num_of_epochs)
    return dataset
  return input_feed_function

In [22]:
train_feed_input = feed_input(normed_train_features, training_labels)
train_feed_input_testing = feed_input(normed_train_features, training_labels, num_of_epochs=1, shuffle=False)
test_feed_input = feed_input(normed_test_features, 
test_labels, num_of_epochs=1, shuffle=False)

In [23]:
feature_columns_numeric = [tf.feature_column.numeric_column(m) for m in training_features.columns]

In [24]:
from tensorflow.estimator import BoostedTreesClassifier
btree_model = BoostedTreesClassifier(feature_columns = feature_columns_numeric, n_batches_per_layer=1)
btree_model.train(train_feed_input)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmppbfwqtw2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:ten

<tensorflow_estimator.python.estimator.canned.boosted_trees.BoostedTreesClassifier at 0x7f4a48e663d0>

In [25]:
train_predictions = btree_model.predict(train_feed_input_testing)
test_predictions = btree_model.predict(test_feed_input)
train_predictions_series = pd.Series([p['classes'][0].decode("utf-8") for p in train_predictions])
test_predictions_series = pd.Series([p['classes'][0].decode("utf-8") for p in test_predictions])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmppbfwqtw2/model.ckpt-29
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmppbfwqtw2/model.ckpt-29
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [27]:
 def calculate_binary_class_scores(y_true, y_pred):
  accuracy = accuracy_score(y_true, y_pred.astype('int64'))
  precision = precision_score(y_true, y_pred.astype('int64'))
  recall = recall_score(y_true, y_pred.astype('int64'))
  return accuracy, precision, recall

In [28]:
train_accuracy_score, train_precision_score, train_recall_score = calculate_binary_class_scores(training_labels, train_predictions_series)

In [29]:
 test_accuracy_score, test_precision_score, test_recall_score = calculate_binary_class_scores(test_labels, test_predictions_series)

In [30]:
 print('Training Data Accuracy (%) = ', round(train_accuracy_score*100,2))

Training Data Accuracy (%) =  97.5


In [31]:
print('Training Data Precision (%) = ', round(train_precision_score*100,2))

Training Data Precision (%) =  95.45
