# 1. Import Libraries

In [41]:
from get_files_dynamic import get_super_table
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import time

# 2. Goal of Phase 1 

There is a slight change of plans. Instead of breaking up phases 1 and 3, I will do them together.   
So the goal is to make a few models. See how long they take to process the data / train the model and make predictions.  
For all cases we will tune the model to its best accuracy possible - using as much varaibles as possible   
Then after that we will train XX models - one for each camera  
To test the speed, we will do   
1. single query one model 
2. multiple query one model 
3. single query multiple camera 
4. multiple query multiple camera 

This will be done for the single model as well as the multiple model  
That way we can see the time taken for each  

The difference between single and multiple query can be like single is one by one input into the model.  
Multiple can be pass in all at one go 

For the first attempt, i gonna focus on the decision tree since it is the fastest to train so far and is giving good accuracy. Then from there we'll see how to extend to the rest 

Technically can make the single model also have the camera id - but then will be like a bit of overlap with the multiple model idea. But nothing wrong with having it in, depends on the direction we want to take. 

# 3. Get Dataset 

There are congestion with NaN - technically can try fix but will be time consuming cos need figure out dynamically things.  
Also did the automated pull stop or something? 

In [124]:
df = get_super_table()

In [126]:
df.head()

Unnamed: 0,call_timestamp,cam_id,non_rainfall_station_id,rainfall_station_id,2hr_forecast_area,compass,direction,rainfall_realtime,wind_speed_realtime,wind_dir_realtime,...,24hr_period_1_start,24hr_period_1_end,24hr_period_1,24hr_period_2_start,24hr_period_2_end,24hr_period_2,24hr_period_3_start,24hr_period_3_end,24hr_period_3,trafficCongestion
0,2022-04-12T130408,1001,S107,S119,Kallang,south,1,0.0,4.0,141.0,...,2022-04-12T000000,2022-04-12T060000,Partly Cloudy (Night),2022-04-12T060000,2022-04-12T120000,Cloudy,2022-04-12T120000,2022-04-12T180000,Cloudy,
1,2022-04-12T130408,1001,S107,S119,Kallang,south,2,0.0,4.0,141.0,...,2022-04-12T000000,2022-04-12T060000,Partly Cloudy (Night),2022-04-12T060000,2022-04-12T120000,Cloudy,2022-04-12T120000,2022-04-12T180000,Cloudy,
2,2022-04-12T130408,1501,S107,S108,City,south,1,0.0,4.0,141.0,...,2022-04-12T000000,2022-04-12T060000,Partly Cloudy (Night),2022-04-12T060000,2022-04-12T120000,Cloudy,2022-04-12T120000,2022-04-12T180000,Cloudy,
3,2022-04-12T130408,1502,S107,S108,City,south,1,0.0,4.0,141.0,...,2022-04-12T000000,2022-04-12T060000,Partly Cloudy (Night),2022-04-12T060000,2022-04-12T120000,Cloudy,2022-04-12T120000,2022-04-12T180000,Cloudy,
4,2022-04-12T130408,1503,S107,S108,City,south,1,0.0,4.0,141.0,...,2022-04-12T000000,2022-04-12T060000,Partly Cloudy (Night),2022-04-12T060000,2022-04-12T120000,Cloudy,2022-04-12T120000,2022-04-12T180000,Cloudy,


In [122]:
df.trafficCongestion.unique()

array([nan, 'None', 'Medium', 'Mild', 'Heavy'], dtype=object)

## 2.1 Transformations

Note that i still kept the station/area identifiers.  
Should probably drop them but i think will drop later manually when needed 

In [44]:
# May just end up dropping since too much effort to convert and very little values 
df.call_timestamp = pd.to_datetime(df.call_timestamp)
df["24hr_start"] = pd.to_datetime(df["24hr_start"])
df["24hr_end"] = pd.to_datetime(df["24hr_end"])
df["24hr_period_1_start"] = pd.to_datetime(df["24hr_period_1_start"])
df["24hr_period_1_end"] = pd.to_datetime(df["24hr_period_1_end"])
df["24hr_period_2_start"] = pd.to_datetime(df["24hr_period_2_start"])
df["24hr_period_2_end"] = pd.to_datetime(df["24hr_period_2_end"])
df["24hr_period_3_start"] = pd.to_datetime(df["24hr_period_3_start"])
df["24hr_period_3_end"] = pd.to_datetime(df["24hr_period_3_end"])

# Make dummy values
dummy_df = pd.get_dummies(df.drop(["trafficCongestion", "rainfall_station_id", "non_rainfall_station_id",
                                 "2hr_forecast_area", "compass"], axis = 1))
df_keep = df[["trafficCongestion", "rainfall_station_id", "non_rainfall_station_id", "2hr_forecast_area", "compass"]]

# reform with dropped values 
dummy_df = pd.merge(dummy_df, df_keep, left_index=True, right_index=True)

In [45]:
numeric_df = df[["rainfall_realtime", "wind_speed_realtime", "wind_dir_realtime", 'humidity_realtime', 'air_temp_realtime', 
           '4day_temperature_low_1', '4day_temperature_low_2', '4day_temperature_low_3', '4day_temperature_low_4',
           '4day_temperature_high_1', '4day_temperature_high_2', '4day_temperature_high_3', '4day_temperature_high_4',
           '4day_relative_humidity_low_1', '4day_relative_humidity_low_2','4day_relative_humidity_low_3',
           '4day_relative_humidity_low_4', '4day_relative_humidity_high_1', '4day_relative_humidity_high_2',
           '4day_relative_humidity_high_3', '4day_relative_humidity_high_4','4day_wind_speed_low_1',
           '4day_wind_speed_low_2', '4day_wind_speed_low_3', '4day_wind_speed_low_4','4day_wind_speed_high_1',
           '4day_wind_speed_high_2','4day_wind_speed_high_3', '4day_wind_speed_high_4', 
           '24hr_general_relative_humidity_low','24hr_general_relative_humidity_high', '24hr_general_temperature_low',
           '24hr_general_temperature_high', '24hr_general_wind_speed_low', '24hr_general_wind_speed_high', 
           "trafficCongestion", 
          "cam_id", "direction", "rainfall_station_id", "non_rainfall_station_id", "2hr_forecast_area", "compass"]]

In [46]:
drop_identifiers = ["rainfall_station_id", "non_rainfall_station_id", "2hr_forecast_area", "compass"]
drop_camera = ["cam_id", "direction"]
drop_time = ["call_timestamp", "24hr_start", "24hr_end", "24hr_period_1_start", "24hr_period_1_end",
            "24hr_period_2_start", "24hr_period_2_end", "24hr_period_3_start", "24hr_period_3_end"]

## 2.2 Splitting the Dataset 

As with any model training, we will split the dataset into three portions. The first will be the training dataset using which the models will be trained upon. Next will the validation dataset which is used to help measure the performance and any tuning parameters if present. Finally, there is the test dataset. This dataset will be the unseen data that is not used at all until the final stage when determining a model. 

In [47]:
def get_dummy_data(remove_camera = True, validation_set = True, remove_time = True):
    df = dummy_df.drop(drop_identifiers, axis = 1)
    
    if remove_camera:
        df = df.drop(drop_camera, axis = 1)
    
    if remove_time:
        df = df.drop(drop_time, axis = 1)
    
    df = df.dropna()
    
    X = df.drop("trafficCongestion", axis = 1)
    Y = df.trafficCongestion.to_numpy()
    
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
    
    if validation_set:
        X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
        return X_train, X_valid, X_test, y_train, y_valid, y_test
    
    return X_train, X_test, y_train, y_test

In [48]:
def get_numeric_data(remove_camera = True, validation_set = True):
    df = numeric_df.drop(drop_identifiers, axis = 1)
    
    if remove_camera:
        df = df.drop(drop_camera, axis = 1)
    
    df = df.dropna()
    
    X = df.drop("trafficCongestion", axis = 1)
    Y = df.trafficCongestion.to_numpy()
    
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
    
    if validation_set:
        X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
        return X_train, X_valid, X_test, y_train, y_valid, y_test
    
    return X_train, X_test, y_train, y_test

# 3. Helper Functions

In [49]:
def plot_confusion(predicted, actual):
    cm = confusion_matrix(actual, predicted)
    cmd = ConfusionMatrixDisplay(cm)
    cmd.plot(colorbar= False, cmap = "binary")
    
def get_confusion(predicted, actual):
    cm = confusion_matrix(actual, predicted)
    return pd.DataFrame(cm)

def get_accuracy(predicted, actual):
    return np.mean(predicted == actual)

In [50]:
# List of all camera ids 
all_camera = df.cam_id.unique()

In [51]:
def single_model_single_query(model, x_val, y_val, to_print = True):
    acc = []
    start_time = time.perf_counter()
    for i in range(len(x_val)):
        accuracy = get_accuracy(y_val[i], model.predict(x_val.iloc[i:i+1,:]))
        acc.append(accuracy)
    end_time = time.perf_counter()
    if to_print:
        print(f"The accuracy is: {np.mean(acc)}")
        print(f"The time taken to predict is {end_time - start_time:0.4f} seconds")
    return np.mean(acc)
    
def single_model_multiple_query(model, x_val, y_val, to_print = True):
    start_time = time.perf_counter()
    accuracy = get_accuracy(y_val, model.predict(x_val))
    end_time = time.perf_counter()
    if to_print:
        print(f"The accuracy is: {accuracy}")
        print(f"The time taken to predict is {end_time - start_time:0.4f} seconds")
    return accuracy
    
def multiple_model_single_query(model_dict, x_val, y_val, to_print = True):
    start_time = time.perf_counter()
    acc = []
    for cam in all_camera:
        acc_inner = []
        index = x_val.cam_id == cam
        y_test = y_val[index]
        x_test = x_val[index]
        for i in range(sum(index)):
            accuracy = get_accuracy(y_test[i], model_dict[cam].predict(x_test.iloc[i:i+1,:]))
            acc_inner.append(accuracy)
        acc.append(np.mean(acc_inner))
    end_time = time.perf_counter()
    if to_print:
        print(f"The average accuracy is: {np.mean(acc)}")
        print(f"The total time taken to predict is {end_time - start_time:0.4f} seconds")
    return np.mean(acc)
    
def multiple_model_multiple_query(model_dict, x_val, y_val, to_print = True):
    start_time = time.perf_counter()
    acc = []
    for cam in all_camera:
        index = x_val.cam_id == cam
        accuracy = get_accuracy(y_val[index], model_dict[cam].predict(x_val[index]))
        acc.append(accuracy)
    end_time = time.perf_counter()
    if to_print:
        print(f"The average accuracy is: {np.mean(acc)}")
        print(f"The total time taken to predict is {end_time - start_time:0.4f} seconds")
    return np.mean(acc)

# 4. Models 

This section will cover building the models and then testing them as described in section 2.  
All models are built using the scikitlearn package. 

Note that if we want to focus on a few can look into nearest neighbour and random forest only cos they can control the number of jobs (ie the number of processes - parallelisation) 

## 4.1 Logistic Regression - Multinomial Case 

A simple model - but basically most of it end in failure   
So we drop 

### 4.1.1 Dummy Df 

Failed - took forever and still no result

In [52]:
## Dont bother running will take forever and not work 
# start_time = time.perf_counter()
# clf = LogisticRegression(random_state=0, max_iter = 10000).fit(X_train, y_train)
# end_time = time.perf_counter()
# print(f"Time to train logistic regression is {end_time - start_time:0.4f}")

### 4.1.2 Numeric Df 

#### 4.1.2.1 Single Model 

In [53]:
X_train, X_test, y_train, y_test = get_numeric_data(remove_camera=True, validation_set=False)

In [54]:
# start_time = time.perf_counter()
# clf = LogisticRegression(random_state=0, max_iter = 10000).fit(X_train, y_train)
# end_time = time.perf_counter()
# print(f"Time to train logistic regression is {end_time - start_time:0.4f}")

In [55]:
# single_model_single_query(clf, X_test, y_test)

In [56]:
# single_model_multiple_query(clf, X_test, y_test)

#### 4.1.2.2 Multiple Model 

Failed - does not converge

In [57]:
X_train, X_test, y_train, y_test = get_numeric_data(remove_camera = False, validation_set = False)

In [58]:
# all_models = {}

# start_time = time.perf_counter()
# for cam in all_camera:
#     index = X_train.cam_id == cam
#     train_x = X_train[index]
#     train_y = y_train[index]
    
#     clf = LogisticRegression(random_state=0, max_iter = 10000).fit(train_x, train_y)
    
#     all_models[cam] = clf

# end_time = time.perf_counter()
# print(f"Time to train all the logistic regression models is {end_time - start_time:0.4f}")

## 4.2 K Nearest Neighbour 

This method takes forever to run.  
I need to figure out why is it not running properly  

### 4.2.1 Dummy Df

#### 4.2.1.1 Single Model 

Can tune for number of neighbours   
Can set the number of cpu used 

takes forever to predict 

In [59]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=True, validation_set=True)

In [60]:
start_time = time.perf_counter()
clf = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(X_train, y_train) # 
end_time = time.perf_counter()
print(f"Time to train KNN is {end_time - start_time:0.4f}")

Time to train KNN is 0.1916


In [61]:
# single_model_single_query(clf, X_valid, y_valid)

In [62]:
# single_model_multiple_query(clf, X_test, y_test)

#### 4.2.1.2 Multiple Model

In [63]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=False, validation_set=True)

In [64]:
# all_models = {}
# start_time = time.perf_counter()
# for cam in all_camera:
#     index = X_train.cam_id == cam
#     train_x = X_train[index]
#     train_y = y_train[index]
    
#     clf = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(X_train, y_train)
    
#     all_models[cam] = clf

# end_time = time.perf_counter()
# print(f"Time to train all the KNN is {end_time - start_time:0.4f}")

In [65]:
# multiple_model_single_query(all_models, X_valid, y_valid)

In [66]:
# multiple_model_multiple_query(all_models, X_valid, y_valid)

### 4.2.2 Numeric Df

#### 4.2.2.1 Single Model 

In [67]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=True, validation_set=True)

In [68]:
# start_time = time.perf_counter()
# clf = KNeighborsClassifier(n_neighbors=1, n_jobs=-1).fit(X_train, y_train) # 
# end_time = time.perf_counter()
# print(f"Time to train KNN is {end_time - start_time:0.4f}")

In [69]:
# single_model_single_query(clf, X_valid, y_valid)

In [70]:
# single_model_multiple_query(clf, X_valid, y_valid)

#### 4.2.2.2 Multiple Model 

In [71]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=False, validation_set=True)

In [72]:
# all_models = {}
# start_time = time.perf_counter()
# for cam in all_camera:
#     index = X_train.cam_id == cam
#     train_x = X_train[index]
#     train_y = y_train[index]
    
#     clf = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(X_train, y_train)
    
#     all_models[cam] = clf

# end_time = time.perf_counter()
# print(f"Time to train all the KNN is {end_time - start_time:0.4f}")

In [73]:
# multiple_model_single_query(all_models, X_valid, y_valid)

In [74]:
# multiple_model_multiple_query(all_models, X_valid, y_valid)

## 4.3 Decision Tree

The only properly working model so far 

### 4.3.1 Dummy Df 

#### 4.3.1.1 Single Model 

In [75]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=True, validation_set=True)

In [76]:
start_time = time.perf_counter()
clf = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
end_time = time.perf_counter()
print(f"Time to train decision tree is {end_time - start_time:0.4f}")

Time to train decision tree is 1.6141


In [77]:
_ = single_model_single_query(clf, X_valid, y_valid)

The accuracy is: 0.5801425875445586
The time taken to predict is 57.6524 seconds


In [78]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5801425875445586
The time taken to predict is 0.0429 seconds


#### 4.3.1.2 Multiple Model 

In [79]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=False, validation_set=True)

In [80]:
start_time = time.perf_counter()
all_models = {}
for cam in all_camera:
    index = X_train.cam_id == cam
    train_x = X_train[index]
    train_y = y_train[index]
    
    clf = DecisionTreeClassifier(random_state=0).fit(train_x, train_y)
    
    all_models[cam] = clf

end_time = time.perf_counter()
print(f"Time to train all the decision trees are {end_time - start_time:0.4f}")

Time to train all the decision trees are 1.4666


In [81]:
_ = multiple_model_single_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8522838335958195
The total time taken to predict is 60.8998 seconds


In [82]:
_ = multiple_model_multiple_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8522838335958195
The total time taken to predict is 0.2039 seconds


### 4.3.2 Numeric Df 

#### 4.3.2.1 Single Model 

In [83]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=True, validation_set=True)

In [84]:
start_time = time.perf_counter()
clf = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
end_time = time.perf_counter()
print(f"Time to train decision tree is {end_time - start_time:0.4f}")

Time to train decision tree is 0.6631


In [85]:
_ = single_model_single_query(clf, X_valid, y_valid)

The accuracy is: 0.5854057454392955
The time taken to predict is 43.2139 seconds


In [86]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5854057454392955
The time taken to predict is 0.0232 seconds


#### 4.3.2.2 Multiple Model 

In [87]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=False, validation_set=True)

In [88]:
start_time = time.perf_counter()
all_models = {}
for cam in all_camera:
    index = X_train.cam_id == cam
    train_x = X_train[index]
    train_y = y_train[index]
    
    clf = DecisionTreeClassifier(random_state=0).fit(train_x, train_y)
    
    all_models[cam] = clf

end_time = time.perf_counter()
print(f"Time to train all the decision trees are {end_time - start_time:0.4f}")

Time to train all the decision trees are 1.5602


In [89]:
_ = multiple_model_single_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8522838335958195
The total time taken to predict is 64.6352 seconds


In [90]:
_ = multiple_model_multiple_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8522838335958195
The total time taken to predict is 0.2106 seconds


## 4.4 Random Forest 

Technically can use the OOB but need read up more of the documentation 

### 4.4.1 Dummy Df 

#### 4.4.1.1 Single Model 

In [91]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=True, validation_set=True)

In [92]:
start_time = time.perf_counter()
clf = RandomForestClassifier(random_state=0).fit(X_train, y_train)
end_time = time.perf_counter()
print(f"Time to train decision tree is {end_time - start_time:0.4f}")

Time to train decision tree is 20.9607


In [93]:
# _ = single_model_single_query(clf, X_valid, y_valid)  # Forever to run 

In [94]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5816942755294611
The time taken to predict is 1.1842 seconds


#### 4.4.1.2 Multiple Model 

In [95]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=False, validation_set=True)

In [96]:
start_time = time.perf_counter()
all_models = {}
for cam in all_camera:
    index = X_train.cam_id == cam
    train_x = X_train[index]
    train_y = y_train[index]
    
    clf = RandomForestClassifier(random_state=0).fit(train_x, train_y)
    
    all_models[cam] = clf

end_time = time.perf_counter()
print(f"Time to train all the decision trees are {end_time - start_time:0.4f}")

Time to train all the decision trees are 21.2379


In [97]:
# _ = multiple_model_single_query(all_models, X_valid, y_valid)

In [98]:
_ = multiple_model_multiple_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8778174572353196
The total time taken to predict is 1.7076 seconds


### 4.4.2 Numeric Df 

#### 4.4.2.1 Single Model 

In [99]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=True, validation_set=True)

In [100]:
start_time = time.perf_counter()
clf = RandomForestClassifier(random_state=0).fit(X_train, y_train)
end_time = time.perf_counter()
print(f"Time to train decision tree is {end_time - start_time:0.4f}")

Time to train decision tree is 10.7204


In [101]:
# _ = single_model_single_query(clf, X_valid, y_valid)  # Forever to run 

In [102]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5857831830572447
The time taken to predict is 1.0601 seconds


**Repeating single model but with the camera**

In [103]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=False, validation_set=True)

In [104]:
start_time = time.perf_counter()
clf = RandomForestClassifier(random_state=0).fit(X_train, y_train)
end_time = time.perf_counter()
print(f"Time to train decision tree is {end_time - start_time:0.4f}")

Time to train decision tree is 19.7334


In [105]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.6067100020968756
The time taken to predict is 1.5987 seconds


#### 4.4.2.2 Mutliple Model 

In [106]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=False, validation_set=True)

In [107]:
start_time = time.perf_counter()
all_models = {}
for cam in all_camera:
    index = X_train.cam_id == cam
    train_x = X_train[index]
    train_y = y_train[index]
    
    clf = RandomForestClassifier(random_state=0).fit(train_x, train_y)
    
    all_models[cam] = clf

end_time = time.perf_counter()
print(f"Time to train all the decision trees are {end_time - start_time:0.4f}")

Time to train all the decision trees are 18.6973


In [108]:
# _ = multiple_model_single_query(all_models, X_valid, y_valid)

In [109]:
_ = multiple_model_multiple_query(all_models, X_valid, y_valid)

The average accuracy is: 0.8809993683651611
The total time taken to predict is 1.6336 seconds


## 4.5 Neural Netowork 

### 4.5.1 Dummy Df 

In [114]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_dummy_data(remove_camera=False, validation_set=True)

In [115]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(2,4,16,32,16,4,2), random_state=1).fit(X_train, y_train)

In [116]:
# _ = single_model_single_query(clf, X_valid, y_valid)  # Forever to run 

In [117]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5863493394841686
The time taken to predict is 0.0940 seconds


### 4.5.2 Numeric Df 

In [118]:
X_train, X_valid, X_test, y_train, y_valid, y_test = get_numeric_data(remove_camera=False, validation_set=True)

In [119]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(2,4,16,32,16,4,2), random_state=1).fit(X_train, y_train)

In [121]:
# _ = single_model_single_query(clf, X_valid, y_valid)  # Forever to run 

In [120]:
_ = single_model_multiple_query(clf, X_valid, y_valid)

The accuracy is: 0.5863493394841686
The time taken to predict is 0.0637 seconds
