In [1]:
from ihrl.hvi import val_true, horizon_value, horizon_qvalue, get_trajectories,greedy_policy
from ihrl.taxicab import TaxiMDP, Root,Get, Nav, Put,taxi_state, taxi_put_state
from ihrl.behavioral_cloning import preprocess_trajectories_numpy

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler

layout_str = """
A--B
----
----
C--D 
"""
mdp = TaxiMDP(layout_str)
test_root = Root(mdp)
test_get = Get(mdp)
test_nav = Nav(mdp)
init_state = mdp.initial_state_sample()




By using test_get, trajectories up to picking up a passenger will be generated. Use test_root to get full trajectories

In [2]:
trajectories = get_trajectories(test_get,100000,deterministic=True) #set determinisitc flag to false to see different result

In [3]:
trajectories[0]

[(TaxiCabState(taxi=Taxi(location=Location(x=2, y=1), passenger=None), waiting_passengers=(Passenger(location=Location(x=3, y=0), destination=None),)),
  TaxiCabAction(direction=Direction(dx=1, dy=0), pickup=False, dropoff=False)),
 (TaxiCabState(taxi=Taxi(location=Location(x=3, y=1), passenger=None), waiting_passengers=(Passenger(location=Location(x=3, y=0), destination=None),)),
  TaxiCabAction(direction=Direction(dx=0, dy=-1), pickup=False, dropoff=False)),
 (TaxiCabState(taxi=Taxi(location=Location(x=3, y=0), passenger=None), waiting_passengers=(Passenger(location=Location(x=3, y=0), destination=None),)),
  TaxiCabAction(direction=Direction(dx=0, dy=0), pickup=True, dropoff=False))]

In [4]:
trajectories_process = preprocess_trajectories_numpy(trajectories)

In [5]:
df_trajs = pd.DataFrame(trajectories_process,columns = ['taxi_x_pos','taxi_y_pos','pass_dest_x','pass_dest_y','wait_loc_x','wait_loc_y','action'])

In [6]:
df_trajs

Unnamed: 0,taxi_x_pos,taxi_y_pos,pass_dest_x,pass_dest_y,wait_loc_x,wait_loc_y,action
0,2,1,-1,-1,3,0,0
1,3,1,-1,-1,3,0,3
2,3,0,-1,-1,3,0,4
3,0,3,-1,-1,0,0,3
4,0,2,-1,-1,0,0,3
...,...,...,...,...,...,...,...
399444,2,3,-1,-1,3,0,0
399445,3,3,-1,-1,3,0,3
399446,3,2,-1,-1,3,0,3
399447,3,1,-1,-1,3,0,3


<h1>Accuracy is 100% if deterministic</h1>
<ul>For states with multiple optimal actions, deterministic flag will cause trajectory to take the same action each time</ul>
<ul>Accuracy drops if not deterministic</ul>

In [7]:
# 60-20-20 train test validation split
X = df_trajs.drop('action', axis=1)
y = df_trajs['action']

# Splitting the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Save original to inspect the unscaled values (states) later
X_test_original = X_test.copy()

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.long)

# Define the neural network with Dropout regularization
class MLPClassifier(nn.Module):
    def __init__(self):
        super(MLPClassifier, self).__init__()
        self.hidden = nn.Sequential(
            nn.Linear(6, 50),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(50, 50),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(50, 50),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(50, 6)  # Output layer should have 6 neurons for 6 classes
        )
        
    def forward(self, x):
        return self.hidden(x)

# Initialize the model, loss function, and optimizer
model = MLPClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Early stopping parameters
patience = 10
best_val_loss = float('inf')
epochs_no_improve = 0

# Train the model with validation and early stopping
num_epochs = 150
for epoch in range(num_epochs):
    # Training phase
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    # Validation phase
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
        val_predictions = torch.max(val_outputs, 1)[1]
        val_accuracy = accuracy_score(y_val, val_predictions)
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy:.4f}')
    
    # Check early stopping condition
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        #Uncomment to save model
        #torch.save(model.state_dict(), 'best_model.pth')
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
    
    if epochs_no_improve >= patience:
        print('Early stopping!')
        break

#Uncomment to load the best model
#model.load_state_dict(torch.load('best_model.pth'))

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    _, y_pred = torch.max(test_outputs, 1)

# Calculate test accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy:.4f}')

#May be redundant but checking on a 2nd validation set of trajectories generated right here
new_trajectories = get_trajectories(test_get,1000,deterministic=True)
new_trajectories_process = preprocess_trajectories_numpy(new_trajectories)
df_new_trajs = pd.DataFrame(new_trajectories_process,columns = ['taxi_x_pos','taxi_y_pos','pass_dest_x','pass_dest_y','wait_loc_x','wait_loc_y','action'])
X_new = df_new_trajs.drop('action', axis=1)
y_new = df_new_trajs['action']


# Standardize the new data
X_new = scaler.transform(X_new)

# Convert to PyTorch tensors
X_new = torch.tensor(X_new, dtype=torch.float32)
y_new = torch.tensor(y_new.values, dtype=torch.long)

# Evaluate the model on the new data
model.eval()
with torch.no_grad():
    new_outputs = model(X_new)
    _, y_new_pred = torch.max(new_outputs, 1)

# Calculate new data accuracy
new_accuracy = accuracy_score(y_new, y_new_pred)
print(f'New Data Accuracy: {new_accuracy:.4f}')


Epoch [10/150], Loss: 1.7342, Val Loss: 1.7230, Val Accuracy: 0.3315
Epoch [20/150], Loss: 1.6624, Val Loss: 1.6352, Val Accuracy: 0.3075
Epoch [30/150], Loss: 1.5841, Val Loss: 1.5229, Val Accuracy: 0.3595
Epoch [40/150], Loss: 1.4904, Val Loss: 1.3893, Val Accuracy: 0.5904
Epoch [50/150], Loss: 1.3774, Val Loss: 1.2138, Val Accuracy: 0.6635
Epoch [60/150], Loss: 1.2512, Val Loss: 1.0143, Val Accuracy: 0.8180
Epoch [70/150], Loss: 1.1307, Val Loss: 0.8257, Val Accuracy: 0.9306
Epoch [80/150], Loss: 1.0211, Val Loss: 0.6663, Val Accuracy: 0.9542
Epoch [90/150], Loss: 0.9226, Val Loss: 0.5301, Val Accuracy: 0.9653
Epoch [100/150], Loss: 0.8348, Val Loss: 0.4196, Val Accuracy: 0.9772
Epoch [110/150], Loss: 0.7516, Val Loss: 0.3308, Val Accuracy: 1.0000
Epoch [120/150], Loss: 0.6821, Val Loss: 0.2592, Val Accuracy: 1.0000
Epoch [130/150], Loss: 0.6144, Val Loss: 0.2007, Val Accuracy: 1.0000
Epoch [140/150], Loss: 0.5490, Val Loss: 0.1547, Val Accuracy: 1.0000
Epoch [150/150], Loss: 0.4953

In [8]:
result_df = X_test_original.copy()
result_df['true_output'] = y_test.numpy()
result_df['predicted_output'] = y_pred.numpy()
def action_num_to_str(action_num):
    if action_num == 0:
        return 'north'
    elif action_num == 1:
        return 'south'
    elif action_num == 2:
        return 'east'
    elif action_num == 3:
        return 'west'
    elif action_num == 4:
        return 'pickup'
    elif action_num == 5:
        return 'dropoff'
result_df['true_output'] = result_df['true_output'].apply(action_num_to_str)
result_df['predicted_output'] = result_df['predicted_output'].apply(action_num_to_str)
mismatch_df = result_df[result_df['true_output'] != result_df['predicted_output']]
mismatch_df = mismatch_df.drop(['pass_dest_x','pass_dest_y'],axis=1)

In [9]:
print(len(result_df))
print(len(mismatch_df))

79890
0


In [10]:
#this will either be empty if determinstic or show cases where multiple optimal actions are possible if not deterministic
mismatch_df.head(10)

Unnamed: 0,taxi_x_pos,taxi_y_pos,wait_loc_x,wait_loc_y,true_output,predicted_output
