In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
%matplotlib inline

In [2]:
dataset = pd.read_csv("census_data.csv")

In [3]:
dataset.tail()

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
32556,27,Private,Assoc-acdm,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,HS-grad,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K
32560,52,Self-emp-inc,HS-grad,9,Married-civ-spouse,Exec-managerial,Wife,White,Female,15024,0,40,United-States,>50K


In [4]:
dataset['income_bracket'].unique()

array([' <=50K', ' >50K'], dtype=object)

In [5]:
def label_fix(label):
    if label == ' <=50K':
        return 0
    else: return 1

In [6]:
dataset['income_bracket'] = dataset['income_bracket'].apply(label_fix)

In [7]:
dataset['income_bracket'].unique()

array([0, 1])

In [8]:
x_data = dataset.drop('income_bracket', axis=1)

In [9]:
y_label = dataset['income_bracket']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_label, test_size=0.33, random_state=42)

In [11]:
dataset.columns

Index(['age', 'workclass', 'education', 'education_num', 'marital_status',
       'occupation', 'relationship', 'race', 'gender', 'capital_gain',
       'capital_loss', 'hours_per_week', 'native_country', 'income_bracket'],
      dtype='object')

In [12]:
age = tf.feature_column.numeric_column('age')
workclass = tf.feature_column.categorical_column_with_hash_bucket('workclass', hash_bucket_size=100)
education = tf.feature_column.categorical_column_with_hash_bucket('education',hash_bucket_size=1000)
education_num = tf.feature_column.numeric_column('education_num')
marital_status = tf.feature_column.categorical_column_with_hash_bucket('marital_status', hash_bucket_size=100)
occupation = tf.feature_column.categorical_column_with_hash_bucket('occupation',hash_bucket_size=1000)
relationship = tf.feature_column.categorical_column_with_hash_bucket('relationship',hash_bucket_size=1000)
race = tf.feature_column.categorical_column_with_hash_bucket('race',hash_bucket_size=100)
gender = tf.feature_column.categorical_column_with_vocabulary_list('gender',['Male','Female'])
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
native_country = tf.feature_column.categorical_column_with_hash_bucket('native_country', hash_bucket_size=1000)

In [13]:
feat_cols = [age,workclass,education, education_num,
             marital_status, occupation, relationship,
             race, gender, capital_gain, capital_loss, 
             hours_per_week, native_country]

In [14]:
input_func = tf.estimator.inputs.pandas_input_fn(batch_size=100,shuffle=True, x=X_train,y=y_train, num_epochs=None)

In [15]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_save_summary_steps': 100, '_log_step_count_steps': 100, '_model_dir': '/var/folders/j2/_gcbj9cn7qv_9m6yr80zlh3m0000gn/T/tmp98aqnb0o', '_tf_random_seed': 1, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5}


In [16]:
model.train(input_fn=input_func, steps=100)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/j2/_gcbj9cn7qv_9m6yr80zlh3m0000gn/T/tmp98aqnb0o/model.ckpt.
INFO:tensorflow:loss = 69.3147, step = 1
INFO:tensorflow:global_step/sec: 135.676
INFO:tensorflow:loss = 288.643, step = 101 (0.740 sec)
INFO:tensorflow:global_step/sec: 231.966
INFO:tensorflow:loss = 195.201, step = 201 (0.431 sec)
INFO:tensorflow:global_step/sec: 185.01
INFO:tensorflow:loss = 286.448, step = 301 (0.538 sec)
INFO:tensorflow:global_step/sec: 231.163
INFO:tensorflow:loss = 88.9643, step = 401 (0.433 sec)
INFO:tensorflow:global_step/sec: 183.908
INFO:tensorflow:loss = 215.381, step = 501 (0.544 sec)
INFO:tensorflow:global_step/sec: 189.752
INFO:tensorflow:loss = 40.778, step = 601 (0.527 sec)
INFO:tensorflow:global_step/sec: 187.614
INFO:tensorflow:loss = 606.099, step = 701 (0.533 sec)
INFO:tensorflow:global_step/sec: 182.295
INFO:tensorflow:loss = 222.168, step = 801 (0.549 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:global_step/sec: 174.633
INFO:tensorflow:loss = 31.3335, step = 8401 (0.573 sec)
INFO:tensorflow:global_step/sec: 133.72
INFO:tensorflow:loss = 51.4815, step = 8501 (0.750 sec)
INFO:tensorflow:global_step/sec: 207.072
INFO:tensorflow:loss = 54.1248, step = 8601 (0.484 sec)
INFO:tensorflow:global_step/sec: 231.339
INFO:tensorflow:loss = 43.3552, step = 8701 (0.431 sec)
INFO:tensorflow:global_step/sec: 166.058
INFO:tensorflow:loss = 29.0829, step = 8801 (0.604 sec)
INFO:tensorflow:global_step/sec: 189.018
INFO:tensorflow:loss = 61.4074, step = 8901 (0.530 sec)
INFO:tensorflow:global_step/sec: 149.941
INFO:tensorflow:loss = 66.7411, step = 9001 (0.663 sec)
INFO:tensorflow:global_step/sec: 171.156
INFO:tensorflow:loss = 41.8449, step = 9101 (0.584 sec)
INFO:tensorflow:global_step/sec: 190.858
INFO:tensorflow:loss = 41.815, step = 9201 (0.527 sec)
INFO:tensorflow:global_step/sec: 162.773
INFO:tensorflow:loss = 268.762, step = 9301 (0.612 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:global_step/sec: 188.984
INFO:tensorflow:loss = 31.0489, step = 16801 (0.529 sec)
INFO:tensorflow:global_step/sec: 234.932
INFO:tensorflow:loss = 31.5865, step = 16901 (0.426 sec)
INFO:tensorflow:global_step/sec: 192.485
INFO:tensorflow:loss = 26.5096, step = 17001 (0.519 sec)
INFO:tensorflow:global_step/sec: 189.581
INFO:tensorflow:loss = 33.6597, step = 17101 (0.528 sec)
INFO:tensorflow:global_step/sec: 226.873
INFO:tensorflow:loss = 21.5472, step = 17201 (0.443 sec)
INFO:tensorflow:global_step/sec: 255.882
INFO:tensorflow:loss = 84.9479, step = 17301 (0.391 sec)
INFO:tensorflow:global_step/sec: 193.3
INFO:tensorflow:loss = 44.1421, step = 17401 (0.514 sec)
INFO:tensorflow:global_step/sec: 190.152
INFO:tensorflow:loss = 28.6281, step = 17501 (0.526 sec)
INFO:tensorflow:global_step/sec: 190.637
INFO:tensorflow:loss = 35.6638, step = 17601 (0.524 sec)
INFO:tensorflow:global_step/sec: 220.808
INFO:tensorflow:loss = 42.0779, step = 17701 (0.455 sec)
INFO:tensorflow:global

INFO:tensorflow:global_step/sec: 164.276
INFO:tensorflow:loss = 29.9469, step = 25201 (0.607 sec)
INFO:tensorflow:global_step/sec: 210.005
INFO:tensorflow:loss = 38.7741, step = 25301 (0.479 sec)
INFO:tensorflow:global_step/sec: 191.758
INFO:tensorflow:loss = 39.7837, step = 25401 (0.518 sec)
INFO:tensorflow:global_step/sec: 194.655
INFO:tensorflow:loss = 46.4466, step = 25501 (0.512 sec)
INFO:tensorflow:global_step/sec: 190.45
INFO:tensorflow:loss = 36.4049, step = 25601 (0.525 sec)
INFO:tensorflow:global_step/sec: 198.19
INFO:tensorflow:loss = 32.1329, step = 25701 (0.505 sec)
INFO:tensorflow:global_step/sec: 194.931
INFO:tensorflow:loss = 26.9706, step = 25801 (0.515 sec)
INFO:tensorflow:global_step/sec: 198.689
INFO:tensorflow:loss = 33.2054, step = 25901 (0.501 sec)
INFO:tensorflow:global_step/sec: 178.167
INFO:tensorflow:loss = 45.517, step = 26001 (0.561 sec)
INFO:tensorflow:global_step/sec: 192.397
INFO:tensorflow:loss = 26.8624, step = 26101 (0.520 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 246.551
INFO:tensorflow:loss = 39.2935, step = 33601 (0.407 sec)
INFO:tensorflow:global_step/sec: 216.441
INFO:tensorflow:loss = 32.5751, step = 33701 (0.460 sec)
INFO:tensorflow:global_step/sec: 179.639
INFO:tensorflow:loss = 32.2967, step = 33801 (0.559 sec)
INFO:tensorflow:global_step/sec: 185.364
INFO:tensorflow:loss = 36.3369, step = 33901 (0.538 sec)
INFO:tensorflow:global_step/sec: 178.998
INFO:tensorflow:loss = 26.384, step = 34001 (0.563 sec)
INFO:tensorflow:global_step/sec: 183.997
INFO:tensorflow:loss = 35.6806, step = 34101 (0.539 sec)
INFO:tensorflow:global_step/sec: 190.22
INFO:tensorflow:loss = 30.1453, step = 34201 (0.528 sec)
INFO:tensorflow:global_step/sec: 178.063
INFO:tensorflow:loss = 50.8633, step = 34301 (0.562 sec)
INFO:tensorflow:global_step/sec: 163.321
INFO:tensorflow:loss = 50.0581, step = 34401 (0.609 sec)
INFO:tensorflow:global_step/sec: 195.092
INFO:tensorflow:loss = 44.6828, step = 34501 (0.514 sec)
INFO:tensorflow:global

INFO:tensorflow:global_step/sec: 208.093
INFO:tensorflow:loss = 33.0154, step = 42001 (0.480 sec)
INFO:tensorflow:global_step/sec: 212.481
INFO:tensorflow:loss = 45.6215, step = 42101 (0.471 sec)
INFO:tensorflow:global_step/sec: 221.639
INFO:tensorflow:loss = 20.2598, step = 42201 (0.453 sec)
INFO:tensorflow:global_step/sec: 214.514
INFO:tensorflow:loss = 33.199, step = 42301 (0.465 sec)
INFO:tensorflow:global_step/sec: 203.83
INFO:tensorflow:loss = 24.9195, step = 42401 (0.491 sec)
INFO:tensorflow:global_step/sec: 212.375
INFO:tensorflow:loss = 42.7038, step = 42501 (0.471 sec)
INFO:tensorflow:global_step/sec: 204.38
INFO:tensorflow:loss = 23.1406, step = 42601 (0.489 sec)
INFO:tensorflow:global_step/sec: 211.609
INFO:tensorflow:loss = 37.4546, step = 42701 (0.473 sec)
INFO:tensorflow:global_step/sec: 232.32
INFO:tensorflow:loss = 31.935, step = 42801 (0.430 sec)
INFO:tensorflow:global_step/sec: 211.626
INFO:tensorflow:loss = 35.5741, step = 42901 (0.473 sec)
INFO:tensorflow:global_st

INFO:tensorflow:global_step/sec: 203.8
INFO:tensorflow:loss = 35.775, step = 50401 (0.490 sec)
INFO:tensorflow:global_step/sec: 204.282
INFO:tensorflow:loss = 26.9007, step = 50501 (0.490 sec)
INFO:tensorflow:global_step/sec: 210.255
INFO:tensorflow:loss = 33.0855, step = 50601 (0.476 sec)
INFO:tensorflow:global_step/sec: 214.152
INFO:tensorflow:loss = 26.6833, step = 50701 (0.467 sec)
INFO:tensorflow:global_step/sec: 208.737
INFO:tensorflow:loss = 33.3651, step = 50801 (0.479 sec)
INFO:tensorflow:global_step/sec: 208.878
INFO:tensorflow:loss = 39.9729, step = 50901 (0.479 sec)
INFO:tensorflow:global_step/sec: 219.457
INFO:tensorflow:loss = 35.8348, step = 51001 (0.458 sec)
INFO:tensorflow:global_step/sec: 210.546
INFO:tensorflow:loss = 33.3702, step = 51101 (0.473 sec)
INFO:tensorflow:global_step/sec: 234.344
INFO:tensorflow:loss = 34.9918, step = 51201 (0.428 sec)
INFO:tensorflow:global_step/sec: 211.303
INFO:tensorflow:loss = 46.3932, step = 51301 (0.471 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 245.162
INFO:tensorflow:loss = 25.7083, step = 58801 (0.407 sec)
INFO:tensorflow:global_step/sec: 239.868
INFO:tensorflow:loss = 23.9649, step = 58901 (0.415 sec)
INFO:tensorflow:global_step/sec: 188.324
INFO:tensorflow:loss = 30.9668, step = 59001 (0.531 sec)
INFO:tensorflow:global_step/sec: 226.9
INFO:tensorflow:loss = 31.6615, step = 59101 (0.444 sec)
INFO:tensorflow:global_step/sec: 186.134
INFO:tensorflow:loss = 28.7538, step = 59201 (0.534 sec)
INFO:tensorflow:global_step/sec: 166.374
INFO:tensorflow:loss = 33.4339, step = 59301 (0.601 sec)
INFO:tensorflow:global_step/sec: 181.36
INFO:tensorflow:loss = 23.0238, step = 59401 (0.551 sec)
INFO:tensorflow:global_step/sec: 183.637
INFO:tensorflow:loss = 31.5095, step = 59501 (0.545 sec)
INFO:tensorflow:global_step/sec: 256.115
INFO:tensorflow:loss = 26.5077, step = 59601 (0.390 sec)
INFO:tensorflow:global_step/sec: 191.485
INFO:tensorflow:loss = 42.8013, step = 59701 (0.522 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 260.484
INFO:tensorflow:loss = 37.4275, step = 67201 (0.386 sec)
INFO:tensorflow:global_step/sec: 214.273
INFO:tensorflow:loss = 26.5038, step = 67301 (0.466 sec)
INFO:tensorflow:global_step/sec: 192.267
INFO:tensorflow:loss = 30.7397, step = 67401 (0.520 sec)
INFO:tensorflow:global_step/sec: 189.496
INFO:tensorflow:loss = 33.0707, step = 67501 (0.526 sec)
INFO:tensorflow:global_step/sec: 193.484
INFO:tensorflow:loss = 28.5947, step = 67601 (0.517 sec)
INFO:tensorflow:global_step/sec: 216.384
INFO:tensorflow:loss = 28.1989, step = 67701 (0.463 sec)
INFO:tensorflow:global_step/sec: 194.374
INFO:tensorflow:loss = 28.024, step = 67801 (0.514 sec)
INFO:tensorflow:global_step/sec: 226.083
INFO:tensorflow:loss = 32.4436, step = 67901 (0.444 sec)
INFO:tensorflow:global_step/sec: 195.391
INFO:tensorflow:loss = 28.8405, step = 68001 (0.510 sec)
INFO:tensorflow:global_step/sec: 192.651
INFO:tensorflow:loss = 26.3181, step = 68101 (0.519 sec)
INFO:tensorflow:globa

INFO:tensorflow:global_step/sec: 174.448
INFO:tensorflow:loss = 35.0149, step = 75601 (0.574 sec)
INFO:tensorflow:global_step/sec: 175.617
INFO:tensorflow:loss = 30.1573, step = 75701 (0.571 sec)
INFO:tensorflow:global_step/sec: 159.664
INFO:tensorflow:loss = 33.0222, step = 75801 (0.624 sec)
INFO:tensorflow:global_step/sec: 217.821
INFO:tensorflow:loss = 36.3222, step = 75901 (0.460 sec)
INFO:tensorflow:global_step/sec: 271.752
INFO:tensorflow:loss = 28.3699, step = 76001 (0.368 sec)
INFO:tensorflow:global_step/sec: 179.875
INFO:tensorflow:loss = 21.3048, step = 76101 (0.557 sec)
INFO:tensorflow:global_step/sec: 198.826
INFO:tensorflow:loss = 26.6196, step = 76201 (0.500 sec)
INFO:tensorflow:global_step/sec: 216.296
INFO:tensorflow:loss = 28.2808, step = 76301 (0.465 sec)
INFO:tensorflow:global_step/sec: 179.183
INFO:tensorflow:loss = 51.1151, step = 76401 (0.555 sec)
INFO:tensorflow:global_step/sec: 192.463
INFO:tensorflow:loss = 34.1187, step = 76501 (0.519 sec)
INFO:tensorflow:glob

INFO:tensorflow:global_step/sec: 212.062
INFO:tensorflow:loss = 32.4127, step = 84001 (0.475 sec)
INFO:tensorflow:global_step/sec: 212.96
INFO:tensorflow:loss = 30.4556, step = 84101 (0.468 sec)
INFO:tensorflow:global_step/sec: 158.628
INFO:tensorflow:loss = 32.5517, step = 84201 (0.628 sec)
INFO:tensorflow:global_step/sec: 192.372
INFO:tensorflow:loss = 47.7376, step = 84301 (0.519 sec)
INFO:tensorflow:global_step/sec: 223.894
INFO:tensorflow:loss = 39.6176, step = 84401 (0.450 sec)
INFO:tensorflow:global_step/sec: 226.727
INFO:tensorflow:loss = 23.3307, step = 84501 (0.438 sec)
INFO:tensorflow:global_step/sec: 176.602
INFO:tensorflow:loss = 27.52, step = 84601 (0.566 sec)
INFO:tensorflow:global_step/sec: 231.342
INFO:tensorflow:loss = 29.9394, step = 84701 (0.432 sec)
INFO:tensorflow:global_step/sec: 213.445
INFO:tensorflow:loss = 26.9458, step = 84801 (0.471 sec)
INFO:tensorflow:global_step/sec: 182.495
INFO:tensorflow:loss = 27.3111, step = 84901 (0.550 sec)
INFO:tensorflow:global_

INFO:tensorflow:global_step/sec: 196.329
INFO:tensorflow:loss = 40.4497, step = 92401 (0.509 sec)
INFO:tensorflow:global_step/sec: 213.854
INFO:tensorflow:loss = 32.4666, step = 92501 (0.470 sec)
INFO:tensorflow:global_step/sec: 227.235
INFO:tensorflow:loss = 25.2751, step = 92601 (0.439 sec)
INFO:tensorflow:global_step/sec: 180.333
INFO:tensorflow:loss = 33.2723, step = 92701 (0.555 sec)
INFO:tensorflow:global_step/sec: 201.21
INFO:tensorflow:loss = 31.2265, step = 92801 (0.497 sec)
INFO:tensorflow:global_step/sec: 176.446
INFO:tensorflow:loss = 28.6471, step = 92901 (0.565 sec)
INFO:tensorflow:global_step/sec: 181.915
INFO:tensorflow:loss = 74.3471, step = 93001 (0.553 sec)
INFO:tensorflow:global_step/sec: 139.161
INFO:tensorflow:loss = 25.7192, step = 93101 (0.715 sec)
INFO:tensorflow:global_step/sec: 197.467
INFO:tensorflow:loss = 24.2556, step = 93201 (0.508 sec)
INFO:tensorflow:global_step/sec: 208.64
INFO:tensorflow:loss = 30.0157, step = 93301 (0.480 sec)
INFO:tensorflow:global

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x121975eb8>

In [17]:
eval_func = tf.estimator.inputs.pandas_input_fn(x=X_test, y=y_test, shuffle=False, num_epochs=1)

In [18]:
results = list(model.predict(input_fn=eval_func))

INFO:tensorflow:Restoring parameters from /var/folders/j2/_gcbj9cn7qv_9m6yr80zlh3m0000gn/T/tmp98aqnb0o/model.ckpt-100000


In [19]:
results

[{'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.02769974], dtype=float32),
  'logits': array([-3.55824184], dtype=float32),
  'probabilities': array([ 0.97230023,  0.02769973], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.48355702], dtype=float32),
  'logits': array([-0.0657956], dtype=float32),
  'probabilities': array([ 0.51644295,  0.48355702], dtype=float32)},
 {'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'logistic': array([ 0.68369406], dtype=float32),
  'logits': array([ 0.77080059], dtype=float32),
  'probabilities': array([ 0.31630597,  0.68369406], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.03784166], dtype=float32),
  'logits': array([-3.23576832], dtype=float32),
  'probabilities': array([ 0.96215832,  0.03784166], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array

In [20]:
final_results = [pred['class_ids'][0] for pred in results]

In [21]:
print(classification_report(y_test, final_results))

             precision    recall  f1-score   support

          0       0.88      0.94      0.91      8196
          1       0.74      0.59      0.66      2550

avg / total       0.85      0.85      0.85     10746

