In [40]:
import tkinter as tk
import reinforcement_learning as rl
from taxi_environment import *
import numpy as np
import pandas as pd
import csv

In [2]:
# convert data in pkl file to table in csv format

# load trained Q table and policy from pkl file
Qt, pi = rl.read_data(filename='q_data_3grids_2019-05-17 19-47-45.pkl')
print("Q table size: " , len(Qt.table))
print("Policy States: " , len(pi.sa_dict))

# write data to csv
row_list = ['taxi_i','taxi_j','pass_i','pass_j','dest_i', 'dest_j', 
            'n_cell', 'e_cell', 's_cell', 'w_cell', 'pass_picked', 
            'action', 'Q(s,a)']           
with open('taxi_data.csv', mode='w', newline='') as csv_file:
    wr = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    wr.writerow(row_list)
    for s in pi.sa_dict.keys():
        # find item with action a, and update with (a, Q)
        for aQ_tuple in pi.sa_dict[s]:
            # add state values to input list
            row_list = s.value().split("_") 
            # add action to input list
            row_list.append(aQ_tuple[0]) 
            # add this q value in y
            row_list.append(aQ_tuple[1].value)         
            wr.writerow(row_list)  

776965
130823


In [41]:
# read csv
df = pd.read_csv('taxi_data.csv')
df.shape

(784938, 13)

In [42]:
df_filtered = df[df['Q(s,a)'] != 0]
df_filtered.shape

(776965, 13)

In [43]:
X = df_filtered[df_filtered.columns[:-1]]
y = df_filtered[df_filtered.columns[-1]]

In [44]:
X.head()

Unnamed: 0,taxi_i,taxi_j,pass_i,pass_j,dest_i,dest_j,n_cell,e_cell,s_cell,w_cell,pass_picked,action
0,14,10,1,3,19,14,0,1,1,1,0,w
1,14,10,1,3,19,14,0,1,1,1,0,e
2,14,10,1,3,19,14,0,1,1,1,0,n
3,14,10,1,3,19,14,0,1,1,1,0,s
4,14,10,1,3,19,14,0,1,1,1,0,p


In [45]:
# convert action letters to numbers
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(X['action'])
X['action'] = le.transform(X['action']) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [46]:
X.head()

Unnamed: 0,taxi_i,taxi_j,pass_i,pass_j,dest_i,dest_j,n_cell,e_cell,s_cell,w_cell,pass_picked,action
0,14,10,1,3,19,14,0,1,1,1,0,5
1,14,10,1,3,19,14,0,1,1,1,0,1
2,14,10,1,3,19,14,0,1,1,1,0,2
3,14,10,1,3,19,14,0,1,1,1,0,4
4,14,10,1,3,19,14,0,1,1,1,0,3


In [47]:
# scale input for better SVR model
from sklearn.preprocessing import scale
scaled_X = pd.DataFrame(scale(X))
scaled_X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.929856,0.240388,-1.310447,-0.980513,0.555757,0.220067,-0.069772,0.0,0.0,0.0,-0.066492,1.461538
1,0.929856,0.240388,-1.310447,-0.980513,0.555757,0.220067,-0.069772,0.0,0.0,0.0,-0.066492,-0.881396
2,0.929856,0.240388,-1.310447,-0.980513,0.555757,0.220067,-0.069772,0.0,0.0,0.0,-0.066492,-0.295662
3,0.929856,0.240388,-1.310447,-0.980513,0.555757,0.220067,-0.069772,0.0,0.0,0.0,-0.066492,0.875804
4,0.929856,0.240388,-1.310447,-0.980513,0.555757,0.220067,-0.069772,0.0,0.0,0.0,-0.066492,0.290071


In [48]:
# split data into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.3, random_state=42)
print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (543875, 12)
X_test shape:  (233090, 12)
y_train shape:  (543875,)
y_test shape:  (233090,)


In [55]:
y_test.head()

43995    -2.261531
81818    -2.063432
718875   -2.347486
690239   -2.691768
13937    -2.025411
Name: Q(s,a), dtype: float64

In [49]:
# traing svr model from first 20000 examples
from sklearn.svm import SVR
clf = SVR()
clf.fit(X_train[:20000], y_train[:20000])

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [50]:
y_pred = clf.predict(X_test[:10000])

In [51]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test[:10000], y_pred)

0.27383094338735081

In [104]:
df_compare = pd.DataFrame({'Q_true':y_test[:10000], 'Q_approx':y_pred})
df_compare.head()

Unnamed: 0,Q_approx,Q_true
43995,-2.297314,-2.261531
81818,-2.217488,-2.063432
718875,-2.319667,-2.347486
690239,-2.331941,-2.691768
13937,-2.127923,-2.025411


In [95]:
# method for getting svr prediction, given state and action
def SVR_Q_Approx(state, action, clf):
    i_action = {'d': 0, 'e': 1, 'n': 2, 'p': 3, 's': 4, 'w': 5}
    x_input = np.append(state, i_action[action]).astype('float64')
    input_df = pd.DataFrame(scale(x_input).reshape(1, -1))
    return clf.predict(input_df)[0]

In [97]:
state = np.array([14, 10, 1, 3, 19, 14, 0, 1, 1, 1, 0]) 
action = 'e'
SVR_Q_Approx(state=state, action=action, clf=clf)
#i_action = {'d': 0, 'e': 1, 'n': 2, 'p': 3, 's': 4, 'w': 5}
# x_input = np.append(state, i_action[action]).astype('float64')
# input_df = pd.DataFrame(scale(x_input).reshape(1, -1))
# clf.predict(input_df)[0]

-2.4674814471129709

In [102]:
# save svr model to file
import pickle
def save_model(clf, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(clf, output, pickle.HIGHEST_PROTOCOL)

In [103]:
save_model(clf=clf, filename="taxi-q-svr.pkl")