In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
from IPython.display import clear_output
import matplotlib.pyplot as plt

In [2]:
dataset_path = r'C:\Users\camer\OneDrive - University of Waterloo (1)\mushrooms.csv'

In [3]:
column_names = ['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor', 
                'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color', 'stalk-shape', 
                'stalk-root', 'stalk-surface-above-ring', 'stalk-surface-below-ring', 
                'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-type', 'veil-color', 
                'ring-number', 'ring-type', 'spore-print-color', 'population', 'habitat']

In [4]:
data = pd.read_csv(dataset_path)

In [5]:
classes = []
for i in range(len(data['class'])):
    if data['class'][i] == 'e':
        classes.append(0)
    else:
        classes.append(1)

In [6]:
classes = pd.DataFrame(classes)

In [7]:
data.pop('class')

0       p
1       e
2       e
3       p
4       e
       ..
8119    e
8120    e
8121    e
8122    p
8123    e
Name: class, Length: 8124, dtype: object

In [8]:
train_df, test_df, train_val, test_val = train_test_split(data, classes, test_size=0.2)

In [9]:
print(f"Training Examples: {train_df.shape[0]}")
print(f"Test Examples: {test_df.shape[0]}")

Training Examples: 6499
Test Examples: 1625


In [10]:
feature_columns = []
for feature_name in column_names[1:]:
    vocabulary = train_df[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

In [11]:
#Taken from an example on https://www.tensorflow.org/tutorials/estimator/linear
def make_input_fn(data_df, label_df, num_epochs=5, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

train_input_fn = make_input_fn(train_df, train_val)
val_input_fn = make_input_fn(test_df, test_val, num_epochs = 1, shuffle = False)

In [12]:
print(classes)

      0
0     1
1     0
2     0
3     1
4     0
...  ..
8119  0
8120  0
8121  0
8122  1
8123  0

[8124 rows x 1 columns]


In [13]:
#Used https://www.tensorflow.org/tutorials/estimator/linear
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(val_input_fn)

clear_output()
print(result)

{'accuracy': 0.9987692, 'accuracy_baseline': 0.5021539, 'auc': 0.9999909, 'auc_precision_recall': 0.9999909, 'average_loss': 0.017982932, 'label/mean': 0.49784616, 'loss': 0.017948532, 'precision': 1.0, 'prediction/mean': 0.49880755, 'recall': 0.99752784, 'global_step': 1020}
