In [None]:
!pip install tensorflow
!pip install matplotlib
!pip install pandas

In [None]:
import tensorflow as tf

In [None]:
# read dataset

# dataset origin https://archive.ics.uci.edu/ml/datasets/covertype
file_path = "/home/adrian/Pobrane/covtype.data"

defaults = [tf.int32] * 55 # number of columns
dataset = tf.data.experimental.CsvDataset(file_path, defaults)

In [None]:
# split to features and labels 

columns = [
    "Elevation", 
    "Aspect", 
    "Slope", 
    "Horizontal_Distance_To_Hydrology", 
    "Vertical_Distance_To_Hydrology", 
    "Horizontal_Distance_To_Roadways", 
    "Hillshade_9am",
    "Hillshade_Noon", 
    "Hillshade_3pm", 
    "Horizontal_Distance_To_Fire_Points", 
    "Soil_Type", 
    "Cover_Type"
    ]

def _parse_csv_data(*vals):
    
    soil_type = tf.convert_to_tensor(vals[14:54])
    
    feature_vals = (*vals[:10], soil_type, vals[54])
    features = dict(zip(columns, feature_vals))
    
    label = tf.argmax(vals[10:14], axis=0)
    
    return features, label

In [None]:
# check 
dataset = dataset.map(_parse_csv_data)
print(list(dataset.take(1)))

In [None]:
# define features

numeric_columns = [
    "Elevation", 
    "Aspect", 
    "Slope", 
    "Horizontal_Distance_To_Hydrology", 
    "Vertical_Distance_To_Hydrology", 
    "Horizontal_Distance_To_Roadways", 
    "Hillshade_9am",
    "Hillshade_Noon", 
    "Hillshade_3pm", 
    "Horizontal_Distance_To_Fire_Points", 
]
numeric_features = [tf.feature_column.numeric_column(col) for col in numeric_columns]

cover_type = tf.feature_column.categorical_column_with_identity("Cover_Type", num_buckets=8)
cover_embedding = tf.feature_column.embedding_column(cover_type, dimension=10)

soil_type = tf.feature_column.numeric_column("Soil_Type", shape=(40,))

In [None]:
# define 1st layer for the model

features = numeric_features + [soil_type, cover_embedding]
feature_layer = tf.keras.layers.DenseFeatures(features)

In [None]:
# few consts

EXAMPLES = 581012
TEST_EXAMPLES = int(0.15 * EXAMPLES)
TRAIN_EXAMPLES = EXAMPLES - TEST_EXAMPLES

BATCH_SIZE = 64

BATCHES = int(TRAIN_EXAMPLES / BATCH_SIZE)
STEPS_PER_EPOCH = BATCHES
EPOCHS = int(BATCHES / STEPS_PER_EPOCH) * 10
print(f"TEST_EXAMPLES: {TEST_EXAMPLES}, TRAIN_EXAMPLES: {TRAIN_EXAMPLES}, BATCHES: {BATCHES}, EPOCHS: {EPOCHS}")

In [None]:
# split to training and testing dataset

test_dataset = dataset.take(TEST_EXAMPLES).batch(BATCH_SIZE)
train_dataset = dataset.skip(TEST_EXAMPLES).batch(BATCH_SIZE)

In [None]:
# build model

model = tf.keras.Sequential([
    feature_layer,
    tf.keras.layers.Dense(256),
    tf.keras.layers.Dense(16),
    tf.keras.layers.Dense(8),
    tf.keras.layers.Dense(4, activation=tf.nn.softmax)
])

In [None]:
# compile

model.compile(
    loss="sparse_categorical_crossentropy", 
    optimizer=tf.keras.optimizers.Adam(), 
    metrics=["accuracy", "mae", "mse"]
)

In [None]:
# fit
# steps_per_epoch=? epochs=?

history = model.fit(
    train_dataset, 
    steps_per_epoch=STEPS_PER_EPOCH, 
    epochs=EPOCHS, 
    verbose=0
) 

In [None]:
# inspect model

model.summary()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist["epoch"] = history.epoch
    
    print(hist.columns)
    
    plt.figure()
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.plot(hist["epoch"], hist["loss"], label="Loss")
    plt.legend()
    plt.ylim([0, 10])
    
    plt.figure()
    plt.xlabel("epoch")
    plt.ylabel("accuracy")
    plt.plot(hist["epoch"], hist["accuracy"], label="Accuracy")
    plt.legend()
    plt.ylim([0, 2])
    
    plt.figure()
    plt.xlabel("epoch")
    plt.ylabel("mae")
    plt.plot(hist["epoch"], hist["mae"], label="mae")
    plt.legend()
    plt.ylim([0, 4])    
    
    plt.figure()
    plt.xlabel("epoch")
    plt.ylabel("mse")
    plt.plot(hist["epoch"], hist["mse"], label="mse")
    plt.legend()
    plt.ylim([0, 8])    
    
plot_history(history)

In [None]:
evaluation_result = model.evaluate(test_dataset, steps=550, verbose=0)

dict(zip(model.metrics_names, evaluation_result))

In [None]:
test_predictions = model.predict(test_dataset)

print(f"test_predictions: {len(test_predictions)} ({test_predictions.shape})")
print(f"test_predictions: {test_predictions[0]}")
print(f"test_predictions: {test_predictions[1]}")
print(f"test_predictions: {test_predictions[2]}")
print(f"test_predictions: {test_predictions[3]}")

test_labels = []

for _, label in test_dataset.as_numpy_iterator():
    test_labels.append(label)
    
test_labels = [x for xs in test_labels for x in xs]

print(f"test_labels: {len(test_labels)} ({type(test_labels)})")
print(f"test_labels: {test_labels[0]}")
print(f"test_labels: {test_labels[1]}")
print(f"test_labels: {test_labels[2]}")
print(f"test_labels: {test_labels[3]}")

    
#print(f"test_predictions: {len(test_predictions)} ({test_predictions.shape}), test_labels: {len(test_labels)} ({type(test_labels)})")

#plt.scatter(test_labels, test_predictions)
#plt.xlabel("true values")
#plt.ylabel("predictions")
#plt.axis("equal")
#plt.axis("square")
#plt.xlim([0, plt.xlim()[1]])
#plt.ylim([0, plt.ylim()[1]])

#_ = plt.plot([-100, 100], [-100, 100])