# German Credit Analysis, Using DNN

In [93]:
import pandas as pd

### Read in the data

In [94]:
german = pd.read_csv('MGMT635_GermanCreditData.csv')

In [95]:
german.head()

Unnamed: 0,checking,duration,history,purpose,amount,savings,employment,rate,status,guarantor,...,propert,age,other_debt,housing_type,existing_credit_bank,job,dependents,phone,foreign,target
0,11,6,34,43,1169,65,75,4,93,101,...,121,67,143,152,2,173,1,192,201,1
1,12,48,32,43,5951,61,73,2,92,101,...,121,22,143,152,1,173,1,191,201,2
2,14,12,34,46,2096,61,74,2,93,101,...,121,49,143,152,1,172,2,191,201,1
3,11,42,32,42,7882,61,74,2,93,103,...,122,45,143,153,1,173,2,191,201,1
4,11,24,33,40,4870,61,73,3,93,101,...,124,53,143,153,2,173,2,191,201,2


In [96]:
attribute_dict = {1:['checking', 'history', 'purpose', 'savings',
                    'employment', 'status', 'guarantor','propert', 'other_debt', 'housing_type', 
                    'job', 'phone', 'foreign'], 
                  2:['duration', 'amount', 'rate', 'residence_time','age', 'existing_credit_bank',
                     'dependents',]}

In [97]:
german.describe()

Unnamed: 0,checking,duration,history,purpose,amount,savings,employment,rate,status,guarantor,...,propert,age,other_debt,housing_type,existing_credit_bank,job,dependents,phone,foreign,target
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,...,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,12.577,20.903,32.545,47.148,3271.258,62.105,73.384,2.973,92.682,101.145,...,122.358,35.546,142.675,151.929,1.407,172.904,1.155,191.404,201.037,1.3
std,1.257638,12.058814,1.08312,40.095333,2822.736876,1.580023,1.208306,1.118715,0.70808,0.477706,...,1.050209,11.375469,0.705601,0.531264,0.577654,0.653614,0.362086,0.490943,0.188856,0.458487
min,11.0,4.0,30.0,40.0,250.0,61.0,71.0,1.0,91.0,101.0,...,121.0,19.0,141.0,151.0,1.0,171.0,1.0,191.0,201.0,1.0
25%,11.0,12.0,32.0,41.0,1365.5,61.0,73.0,2.0,92.0,101.0,...,121.0,27.0,143.0,152.0,1.0,173.0,1.0,191.0,201.0,1.0
50%,12.0,18.0,32.0,42.0,2319.5,61.0,73.0,3.0,93.0,101.0,...,122.0,33.0,143.0,152.0,1.0,173.0,1.0,191.0,201.0,1.0
75%,14.0,24.0,34.0,43.0,3972.25,63.0,75.0,4.0,93.0,101.0,...,123.0,42.0,143.0,152.0,2.0,173.0,1.0,192.0,201.0,2.0
max,14.0,72.0,34.0,410.0,18424.0,65.0,75.0,4.0,94.0,103.0,...,124.0,75.0,143.0,153.0,4.0,174.0,2.0,192.0,202.0,2.0


##### Noted as most relevant attributes 

In [98]:
german2 = german[['duration', 'history','amount', 'savings',
       'status','housing_type', 'foreign','target']]

In [99]:
german2.head()


Unnamed: 0,duration,history,amount,savings,status,housing_type,foreign,target
0,6,34,1169,65,93,152,201,1
1,48,32,5951,61,92,152,201,2
2,12,34,2096,61,93,152,201,1
3,42,32,7882,61,93,153,201,1
4,24,33,4870,61,93,153,201,2


### Chop up the data (training, testing)

In [100]:
from sklearn.model_selection import train_test_split

In [101]:
x_data = german2.drop('target',axis=1)

In [102]:
y_labels = german2['target']

In [103]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_labels, test_size=0.02, random_state=101)

In [104]:
german.columns

Index(['checking', 'duration', 'history', 'purpose', 'amount', 'savings',
       'employment', 'rate', 'status', 'guarantor', 'residence_time',
       'propert', 'age', 'other_debt', 'housing_type', 'existing_credit_bank',
       'job', 'dependents', 'phone', 'foreign', 'target'],
      dtype='object')

In [105]:
import tensorflow as tf

### Assign feature columns for TensorFlow (numeric, categorical, etc.)

In [106]:
history = tf.feature_column.numeric_column("history")
savings = tf.feature_column.numeric_column("savings")
status = tf.feature_column.numeric_column("status")
housing_type = tf.feature_column.numeric_column("housing_type")
foreign = tf.feature_column.numeric_column("foreign")
duration = tf.feature_column.numeric_column("duration")
amount = tf.feature_column.numeric_column("amount")

In [107]:
for attribute in attribute_dict.values():
    print(attribute)

['checking', 'history', 'purpose', 'savings', 'employment', 'status', 'guarantor', 'propert', 'other_debt', 'housing_type', 'job', 'phone', 'foreign']
['duration', 'amount', 'rate', 'residence_time', 'age', 'existing_credit_bank', 'dependents']


In [108]:
feat_cols = [duration, history,amount, savings,
            status,housing_type, foreign]

### Create model and Input the data

In [109]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [110]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[3,3,3],feature_columns=feat_cols, n_classes=10)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_model_dir': 'C:\\Users\\JMORR_~1\\AppData\\Local\\Temp\\tmp_jxhcgjd', '_log_step_count_steps': 100, '_save_summary_steps': 100, '_keep_checkpoint_every_n_hours': 10000}


In [111]:
dnn_model.train(input_fn=input_func,steps=5000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\JMORR_~1\AppData\Local\Temp\tmp_jxhcgjd\model.ckpt.
INFO:tensorflow:loss = 7350.23, step = 1
INFO:tensorflow:global_step/sec: 134.756
INFO:tensorflow:loss = 12.8651, step = 101 (0.759 sec)
INFO:tensorflow:global_step/sec: 189.119
INFO:tensorflow:loss = 10.2232, step = 201 (0.526 sec)
INFO:tensorflow:global_step/sec: 148.152
INFO:tensorflow:loss = 9.98891, step = 301 (0.669 sec)
INFO:tensorflow:global_step/sec: 139.462
INFO:tensorflow:loss = 9.37596, step = 401 (0.721 sec)
INFO:tensorflow:global_step/sec: 147.933
INFO:tensorflow:loss = 9.65888, step = 501 (0.696 sec)
INFO:tensorflow:global_step/sec: 139.268
INFO:tensorflow:loss = 9.41948, step = 601 (0.745 sec)
INFO:tensorflow:global_step/sec: 130.7
INFO:tensorflow:loss = 6.93528, step = 701 (0.723 sec)
INFO:tensorflow:global_step/sec: 139.462
INFO:tensorflow:loss = 7.52826, step = 801 (0.717 sec)
INFO:tensorflow:global_step/sec: 123.582
I

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0xfa0b1acb38>

### Evaluate the predictions

In [112]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test,y=y_test,batch_size=1,shuffle=False)

In [113]:
dnn_model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2019-02-18-20:11:15
INFO:tensorflow:Restoring parameters from C:\Users\JMORR_~1\AppData\Local\Temp\tmp_jxhcgjd\model.ckpt-5000
INFO:tensorflow:Finished evaluation at 2019-02-18-20:11:17
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.85, average_loss = 0.509462, global_step = 5000, loss = 0.509462


{'accuracy': 0.85000002,
 'average_loss': 0.50946212,
 'global_step': 5000,
 'loss': 0.50946212}

In [124]:
X_test.shape


(20, 7)