In [30]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [15]:
diabetes = pd.read_csv("pima-indians-diabetes.csv")

In [16]:
diabetes.columns

Index(['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree', 'Age', 'Class', 'Group'],
      dtype='object')

In [17]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [18]:
col_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree']

##### Normalising data

In [19]:
diabetes[col_to_norm] = diabetes[col_to_norm].apply(lambda x:(x-x.min())/(x.max()-x.min()))

In [20]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [48]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [50]:
assigned_group = tf.feature_column.categorical_column_with_vocabulary_list('Group',['A','B','C','D'])

### Converting numerical age to categorical data to get more information

In [51]:
age_buckets = tf.feature_column.bucketized_column(age,boundaries = [20,30,40,50,60,70])

In [52]:
feat_cols = [num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree ,assigned_group, age_buckets]

In [53]:
x_data = diabetes.drop('Class',axis = 1)

In [54]:
labels = diabetes['Class']

In [55]:
X_train, X_test, y_train, y_test = train_test_split(x_data,labels,test_size=0.33, random_state=101)

In [56]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [57]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Dell\\AppData\\Local\\Temp\\tmp5y6xfwoj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000287B7B7C630>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [58]:
model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\Dell\AppData\Local\Temp\tmp5y6xfwoj\model.ckpt.
INFO:tensorflow:loss = 6.931472, step = 1
INFO:tensorflow:global_step/sec: 136.6
INFO:tensorflow:loss = 5.798682, step = 101 (0.732 sec)
INFO:tensorflow:global_step/sec: 319.249
INFO:tensorflow:loss = 4.3921375, step = 201 (0.313 sec)
INFO:tensorflow:global_step/sec: 370.251
INFO:tensorflow:loss = 6.825761, step = 301 (0.270 sec)
INFO:tensorflow:global_step/sec: 352.516
INFO:tensorflow:loss = 5.4334965, step = 401 (0.284 sec)
INFO:tensorflow:global_step/sec: 355.462
INFO:tensorflow:loss = 4.8047433, step = 501 (0.281 sec)
INFO:tensorflow:global_step/sec: 351.671
INFO:ten

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifier at 0x287b83d00b8>

In [62]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x = X_test, y = y_test, batch_size = 10 , num_epochs = 1, shuffle=False)

In [63]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Calling model_fn.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-29T17:12:47Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from C:\Users\Dell\AppData\Local\Temp\tmp5y6xfwoj\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-05-29-17:12:49
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.72440946, accuracy_baseline = 0.65748036, auc = 0.7836052, auc_precision_recall = 0.61829674, average_loss = 0.5349781, global_step = 1000, label/mean = 0.34251967, loss = 5.226324, precision = 0.61333334, prediction/mean = 0.36406332, recall = 0.52873564
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\Dell\AppData\Local\Te

In [64]:
print(results)

{'accuracy': 0.72440946, 'accuracy_baseline': 0.65748036, 'auc': 0.7836052, 'auc_precision_recall': 0.61829674, 'average_loss': 0.5349781, 'label/mean': 0.34251967, 'loss': 5.226324, 'precision': 0.61333334, 'prediction/mean': 0.36406332, 'recall': 0.52873564, 'global_step': 1000}


## Predcitions

In [65]:
pred_input_func = tf.estimator.inputs.pandas_input_fn( x=X_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [72]:
predictions = model.predict(pred_input_func)

In [73]:
list(predictions)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Dell\AppData\Local\Temp\tmp5y6xfwoj\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[{'logits': array([0.09215713], dtype=float32),
  'logistic': array([0.523023], dtype=float32),
  'probabilities': array([0.47697702, 0.523023  ], dtype=float32),
  'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object)},
 {'logits': array([0.4224236], dtype=float32),
  'logistic': array([0.60406303], dtype=float32),
  'probabilities': array([0.39593694, 0.60406303], dtype=float32),
  'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object)},
 {'logits': array([-0.44208878], dtype=float32),
  'logistic': array([0.39124337], dtype=float32),
  'probabilities': array([0.60875666, 0.39124337], dtype=float32),
  'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([-0.7793871], dtype=float32),
  'logistic': array([0.314452], dtype=float32),
  'probabilities': array([0.685548, 0.314452], dtype=float32),
  'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object)},
 {'logits': arra