In [1]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import matplotlib.pyplot as plt

tf.random.set_seed(314)

In [2]:
# read in the training data
df_train = pd.read_csv("data/labelModel_outputs_NoCurbRamp_Seattle.csv")

# read in the testing file
df_test = pd.read_csv('data/test_set_seattle.csv')

# df_train.head()

way_types = df_test['way_type'].unique().tolist()
way_types

way_type_mapping = {
    '-1': 0,
    'tertiary': 0.1,
    'secondary': 0.2,
    'residential': 0.3,
    'living_street': 0.4,
    'primary': 0.5,
    'unclassified': 0.6,
    'tertiary_link': 0.7,
    'crossing': 0.8,
    'primary_link': 0.9,
    'trunk': 1.0,
    'secondary_link': 1.1,
    'busway': 1.2,
    'trunk_link': 1.3,
}

df_train['way_type'] = df_train['way_type'].map(way_type_mapping)

df_test['way_type'] = df_test['way_type'].map(way_type_mapping)

In [4]:
label_types = df_test['label_type'].unique().tolist()
label_types

['CurbRamp',
 'SurfaceProblem',
 'Obstacle',
 'NoCurbRamp',
 'NoSidewalk',
 'Occlusion',
 'Other',
 'Crosswalk',
 'Signal']

In [5]:
label_types[3]

'NoCurbRamp'

In [6]:
df_test = df_test[df_test['label_type'] == label_types[3]]
df_test.head()

Unnamed: 0,label_id,label_type,severity,gsv_panorama_id,zoom,heading,pitch,photographer_heading,photographer_pitch,user_id,...,clustered,count,false_positive,distance,tag_list,description,tag_count,way_type,intersection_distance,verified
19,96,NoCurbRamp,5.0,prMAsZ283Q-GBpPv9OA2IQ,1,143.0625,-21.0625,168.217239,-1.346527,fa0bbee4-b98d-4eb3-b6da-d9e9aa287f6c,...,1.0,8.0,0,5.150603,0.0,0.0,0.0,0.3,38.290406,1
20,97,NoCurbRamp,5.0,prMAsZ283Q-GBpPv9OA2IQ,1,212.25,-26.6875,168.217239,-1.346527,fa0bbee4-b98d-4eb3-b6da-d9e9aa287f6c,...,1.0,8.0,0,20.084151,0.0,0.0,0.0,0.3,31.722383,1
21,100,NoCurbRamp,5.0,prMAsZ283Q-GBpPv9OA2IQ,1,300.9375,-29.875,168.217239,-1.346527,fa0bbee4-b98d-4eb3-b6da-d9e9aa287f6c,...,1.0,9.0,0,19.590366,0.0,0.0,0.0,0.3,32.007204,1
22,101,NoCurbRamp,5.0,prMAsZ283Q-GBpPv9OA2IQ,1,41.0625,-33.4375,168.217239,-1.346527,fa0bbee4-b98d-4eb3-b6da-d9e9aa287f6c,...,0.0,6.0,0,11.451064,0.0,0.0,0.0,0.3,20.791866,1
23,102,NoCurbRamp,5.0,prMAsZ283Q-GBpPv9OA2IQ,1,41.0625,-33.4375,168.217239,-1.346527,fa0bbee4-b98d-4eb3-b6da-d9e9aa287f6c,...,1.0,8.0,0,11.21991,0.0,0.0,0.0,0.3,21.889698,1


In [7]:
# Select node features
feature_cols = ['count', 'severity', 'zoom', 'tag_list', 'way_type', 'intersection_distance', 'description', 'distance']
# feature_cols = ['count', 'severity', 'zoom', 'tag_list', 'description', 'distance']
num_features = len(feature_cols)

X_train = df_train[feature_cols] # Features
y_train = df_train.labelModel_output # Target variable

X = df_test[feature_cols] # Features
y = df_test.verified # Target variable

X_strong, X_test, y_strong, y_test = train_test_split(X, y, test_size=0.5, random_state=14)

X_train = X_train.apply(lambda x: np.where(x > 0, (x / x.max()), x))
X_strong = X_strong.apply(lambda x: np.where(x > 0, (x / x.max()), x))
X_test = X_test.apply(lambda x: np.where(x > 0, (x / x.max()), x))

# for col in feature_cols:
#     scalers[col] = MinMaxScaler(feature_range=(0, 1))
#     X_train[col] = scalers[col].fit_transform(X_train[[col]])

X_train

Unnamed: 0,count,severity,zoom,tag_list,way_type,intersection_distance,description,distance
0,0.588235,1.0,0.333333,1.0,0.230769,0.037319,0.0,0.000723
1,0.294118,1.0,0.666667,1.0,0.230769,0.013627,0.0,0.002702
2,0.352941,1.0,0.333333,1.0,0.230769,0.026810,0.0,0.002832
3,0.411765,1.0,0.333333,1.0,0.230769,0.022153,0.0,0.013274
4,0.411765,1.0,0.333333,1.0,0.230769,0.026406,0.0,0.015763
...,...,...,...,...,...,...,...,...
30584,0.647059,0.6,0.666667,1.0,0.230769,0.020753,0.0,0.004596
30585,0.588235,0.4,0.666667,1.0,0.230769,0.027260,0.0,0.012229
30586,0.705882,0.8,1.000000,1.0,0.230769,0.031364,0.0,0.013118
30587,0.823529,0.6,1.000000,1.0,0.230769,0.025560,0.0,0.035020


In [8]:
print(X_test['count'].describe())

count    4239.000000
mean        0.380417
std         0.103945
min         0.176471
25%         0.294118
50%         0.352941
75%         0.470588
max         1.000000
Name: count, dtype: float64


Configure the hyperparameters

In [9]:
num_epochs = 10
dropout_rate = 0.2
learning_rate = 1e-4 # learning rate

# fine-tuning
initial_num_epochs = 10
num_epochs_fine_tune = 20
learning_rate_fine_tune = 1e-5 # learning rate for fine-tuning

In [None]:
# https://www.kaggle.com/code/sathianpong/3-ways-to-implement-mlp-with-keras
# Specify the model's architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(num_features, activation='relu', input_shape=[num_features]),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Dense(num_features, activation='relu'),
    tf.keras.layers.Dropout(dropout_rate),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid'),
])

# Specify the loss fuction, optimizer, metrics
model.compile(
    # loss = 'binary_crossentropy',
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
    metrics = tf.keras.metrics.Precision()
    # metrics = ['accuracy']
)

# Train the model
history = model.fit(
    X_train, y_train, epochs=20,
    validation_data = (X_test, y_test),
    verbose=2
)


In [None]:
model.summary()

In [None]:
pd.DataFrame(history.history).plot()
plt.xlabel("epoch")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()

Fine-tuning

In [None]:
# Freeze all the layers except the last one
for layer in model.layers[:1]:
    layer.trainable = False
    
model.compile(
    loss = tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    metrics = tf.keras.metrics.Precision()
    # metrics = ['Accuracy']
)

history_fine = model.fit(
    X_strong, y_strong,
    epochs=40,
    initial_epoch=initial_num_epochs,
    validation_data=(X_test, y_test)
)

In [None]:
pd.DataFrame(history_fine.history).plot()
plt.xlabel("epoch")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()