In [2]:
# Import libraries

import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model

2024-07-05 09:14:05.889068: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Add and adjust 'Justin.csv' including pre-proccessing data like time

In [3]:
# Load the dataset without explicit date parsing
file_path = 'justin.csv'
df = pd.read_csv(file_path)

# Preprocess Date & Time
df['DateTime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%m/%d %H:%M')
df.drop(['Date', 'Time'], axis=1, inplace=True)

# Verify Data
print(df.head())


    Location            DateTime
0     Stever 1900-06-30 23:09:00
1     Stever 1900-07-01 22:34:00
2     Stever 1900-07-01 16:00:00
3     Stever 1900-07-01 22:46:00
4  Forbes St 1900-07-02 18:51:00


Setup Time and Date in pandas correctly

In [34]:
# Extract features from datetime
df['Hour'] = df['DateTime'].dt.hour
df['DayOfWeek'] = df['DateTime'].dt.dayofweek

# Encoding via OneHotEncoder
label_encoder = LabelEncoder()
df['Location_Code'] = label_encoder.fit_transform(df['Location'])

# Prepare features (X) and target (y)
X = df[['Hour', 'DayOfWeek']].values
y = df['Location_Code'].values

print(df.head())

    Location            DateTime  Hour  DayOfWeek  Location_Code
0     Stever 1900-06-30 23:09:00    23          5              2
1     Stever 1900-07-01 22:34:00    22          6              2
2     Stever 1900-07-01 16:00:00    16          6              2
3     Stever 1900-07-01 22:46:00    22          6              2
4  Forbes St 1900-07-02 18:51:00    18          0              1


Setup OneHotEncoder

In [35]:
# One-hot encode categorical features (DayOfWeek)
ct = ColumnTransformer(
    [('one_hot_encoder', OneHotEncoder(categories='auto'), [1])],
    remainder='passthrough'
)
X = ct.fit_transform(X)

Splitting dataset into training and testing

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Standarization via scaling

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Defining model

In [48]:
# Set input shape in friendly way 
# to be adjustable depending on # of vars
input_shape = (6,) 

# Input layer
inputs = Input(shape=input_shape)

# Hidden layers
x = Dense(32, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)

# Output layer
outputs = Dense(8, activation='relu')(x)# Example output layer 
outputs = Dense(3, activation='relu')(outputs)# for binary classification

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])


Model Summary - find how many params used

In [31]:
model.summary()

In [58]:
history = model.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=1)

Epoch 1/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6443
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6443
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6443
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6442
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6441
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step - accuracy: 0.7500 - loss: 0.2753 - val_accuracy: 0.7500 - val_loss: 0.6440
Epoch 7/200
[1m1/1[0m [32m━━━━━

KeyboardInterrupt: 

Accuracy of 75.00% currently, simply need more data in order to make it better but for now this isn't bad, it also may be overfitting so keep that in mind