In [9]:
import tensorflow as tf
import pandas as pd

# Read data 
census = pd.read_csv("0.csv", skiprows=1, header=None) # Skip row 1 & Give header None

# Naming the headers
census.columns = ['age', 'workclass', 'education', 'education_num', 'marital_status',
                 'occupation', 'relationship', 'race', 'gender', 'capital_gain',
                 'capital_loss', 'hours_per_week', 'native_country', 'income_bracket']

# Show first 5 rows
census.head()

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [15]:
# Show unique value
census['income_bracket'].unique()
census['gender'].unique()

array([' Male', ' Female'], dtype=object)

In [12]:
# To replace value to 0 and 1
def label_fix(label):
    if label == ' <=50K':
        return 0
    else:
        return 1

# Making the change
census['income_bracket'] = census['income_bracket'].apply(label_fix)
census['income_bracket'].unique()
census.head()

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,1
1,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,1
2,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,1
3,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,1
4,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,1


In [13]:
from sklearn.model_selection import train_test_split

x_data = census.drop('income_bracket', axis=1)
y_labels = census['income_bracket']

x_train, x_test, y_train, y_test = train_test_split(x_data, y_labels, test_size=0.3, random_state=101)

In [14]:
# Inserting the numerical features
age = tf.feature_column.numeric_column('age')
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')

In [16]:
# If I know that feature is only known things Use "categorical.."
gender = tf.feature_column.categorical_column_with_vocabulary_list('gender', ['Male', 'Female'])

# If I have feature have a lot of or unknown things Use 'categorical..._with_hash_bucket'
occupation = tf.feature_column.categorical_column_with_hash_bucket('occupation', hash_bucket_size=1000) # For 1000 value maximum
marital_status = tf.feature_column.categorical_column_with_hash_bucket('marital_status', hash_bucket_size=1000)
relationship = tf.feature_column.categorical_column_with_hash_bucket('relationship', hash_bucket_size=1000)
education = tf.feature_column.categorical_column_with_hash_bucket('education', hash_bucket_size=1000)
workclass = tf.feature_column.categorical_column_with_hash_bucket('workclass', hash_bucket_size=1000)
native_country = tf.feature_column.categorical_column_with_hash_bucket('native_country', hash_bucket_size=1000)

In [18]:
# In the order
feat_cols = [gender, occupation, marital_status, relationship, education, workclass, native_country,
            age, education_num, capital_gain, capital_loss, hours_per_week]

# Input Function
# input_func = tf.estimator.inputs.pandas_input_fn(x=x_train, y=y_train, batch_size=100, num_epochs=None, shuffle=True)