*Original code: Beating the Benchmark from West Nile Virus Prediction @ Kaggle by Abhihsek. Modified by Brendan Lane*

In [119]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution1D, MaxPooling1D
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import *
from keras.optimizers import Adam
from keras.initializers import *

%matplotlib inline

In [120]:
%%javascript
$('.nbp-app-bar').toggle()

<IPython.core.display.Javascript object>

In [197]:
# Load dataset 
train = pd.read_csv('../assets/train.csv')
test = pd.read_csv('../assets/test.csv')
sample = pd.read_csv('../assets/sampleSubmission.csv')
weather = pd.read_csv('../assets/weather.csv')

In [198]:
# Not using codesum for this benchmark
weather.drop('CodeSum', axis='columns', inplace=True)

In [199]:
# Split station 1 and 2 and join horizontally
weather_stn1 = weather[weather['Station']==1]
weather_stn2 = weather[weather['Station']==2]
weather_stn1 = weather_stn1.drop('Station', axis=1)
weather_stn2 = weather_stn2.drop('Station', axis=1)
weather = weather_stn1.merge(weather_stn2, on='Date')

In [200]:
# replace some missing values and T with -1
weather = weather.replace('M', -1)
weather = weather.replace('-', -1)
weather = weather.replace('T', -1)
weather = weather.replace(' T', -1)
weather = weather.replace('  T', -1)

In [201]:
# Functions to extract month and day from dataset
# You can also use parse_dates of Pandas.
def create_year(x):
    return x.split('-')[0]
def create_month(x):
    return x.split('-')[1]
def create_day(x):
    return x.split('-')[2]

train['month'] = train.Date.apply(create_month)
train['day'] = train.Date.apply(create_day)
train['year'] = train.Date.apply(create_year)

test['month'] = test.Date.apply(create_month)
test['day'] = test.Date.apply(create_day)
test['year'] = test.Date.apply(create_year)

In [202]:
# drop address columns
train = train.drop(['Address', 'AddressNumberAndStreet'], axis = 1)
test = test.drop(['Id', 'Address', 'AddressNumberAndStreet'], axis = 1)

In [203]:
# Merge with weather data
train = train.merge(weather, on='Date')
test = test.merge(weather, on='Date')
train = train.drop(['Date'], axis = 1)
test = test.drop(['Date'], axis = 1)

In [204]:
# Convert categorical data to numbers
lbl = LabelEncoder()
lbl.fit(list(train['Species'].values) + list(test['Species'].values))
train['Species'] = lbl.transform(train['Species'].values)
test['Species'] = lbl.transform(test['Species'].values)

lbl.fit(list(train['Street'].values) + list(test['Street'].values))
train['Street'] = lbl.transform(train['Street'].values)
test['Street'] = lbl.transform(test['Street'].values)

lbl.fit(list(train['Trap'].values) + list(test['Trap'].values))
train['Trap'] = lbl.transform(train['Trap'].values)
test['Trap'] = lbl.transform(test['Trap'].values)

In [205]:
# drop columns with -1s
train = train.loc[:,(train != -1).any(axis=0)]
test = test.loc[:,(test != -1).any(axis=0)]

In [206]:
# covert all data types to floats
train = train.astype(float)
test = test.astype(float)

In [207]:
## feature engineering
# Temperature difference
train['temp_delta_x'] = train.Tmax_x - train.Tmin_x
train['temp_delta_y'] = train.Tmax_y - train.Tmin_y
train['temp_delta_x'] = train.Tmax_x - train.Tmin_x
train['temp_delta_y'] = train.Tmax_y - train.Tmin_y
test['temp_delta_x'] = test.Tmax_x - test.Tmin_x
test['temp_delta_y'] = test.Tmax_y - test.Tmin_y
test['temp_delta_x'] = test.Tmax_x - test.Tmin_x
test['temp_delta_y'] = test.Tmax_y - test.Tmin_y

In [208]:
# Is raining?
train['israining_x'] = (train.PrecipTotal_x > 0).astype(int)
train['israining_y'] = (train.PrecipTotal_y > 0).astype(int)
test['israining_x'] = (test.PrecipTotal_x > 0).astype(int)
test['israining_y'] = (test.PrecipTotal_y > 0).astype(int)

In [209]:
# Length of sunlight
train['sunlight'] = train.Sunset_x - train.Sunrise_x
test['sunlight'] = test.Sunset_x - test.Sunrise_x

In [210]:
# Replace outliers with median value
train.WetBulb_x = train.WetBulb_x.apply(lambda x: train.WetBulb_x.median() if x < 0 else x)
test.WetBulb_x = test.WetBulb_x.apply(lambda x: test.WetBulb_x.median() if x < 0 else x)

train.StnPressure_x = train.StnPressure_x.apply(lambda x: train.StnPressure_x.median() if x < 0 else x)
test.StnPressure_x = test.StnPressure_x.apply(lambda x: test.StnPressure_x.median() if x < 0 else x)

In [211]:
# Relative humidity approximation
train['RH'] = 100 - (25 / 9) * (train.Tavg_x - train.DewPoint_x)
test['RH'] = 100 - (25 / 9) * (test.Tavg_x - test.DewPoint_x)

In [212]:
# Is heat wave?
train['isheat'] = (train.Heat_x > 0).astype(float)
test['isheat'] = (test.Heat_x > 0).astype(float)

In [213]:
# Vectorize resultant wind into north and east components
wind_dir_north_x = train.ResultDir_x.apply(lambda x: np.sin(np.deg2rad(x)))
wind_dir_east_x = train.ResultDir_x.apply(lambda x: np.cos(np.deg2rad(x)))
train['wind_north_x'] = wind_dir_north_x * train.ResultSpeed_x
train['wind_east_x'] = wind_dir_east_x * train.ResultSpeed_x

wind_dir_north_y = train.ResultDir_y.apply(lambda x: np.sin(np.deg2rad(x)))
wind_dir_east_y = train.ResultDir_y.apply(lambda x: np.cos(np.deg2rad(x)))
train['wind_north_y'] = wind_dir_north_y * train.ResultSpeed_y
train['wind_east_y'] = wind_dir_east_y * train.ResultSpeed_y

wind_dir_north_x = test.ResultDir_x.apply(lambda x: np.sin(np.deg2rad(x)))
wind_dir_east_x = test.ResultDir_x.apply(lambda x: np.cos(np.deg2rad(x)))
test['wind_north_x'] = wind_dir_north_x * test.ResultSpeed_x
test['wind_east_x'] = wind_dir_east_x * test.ResultSpeed_x

wind_dir_north_y = test.ResultDir_y.apply(lambda x: np.sin(np.deg2rad(x)))
wind_dir_east_y = test.ResultDir_y.apply(lambda x: np.cos(np.deg2rad(x)))
test['wind_north_y'] = wind_dir_north_y * test.ResultSpeed_y
test['wind_east_y'] = wind_dir_east_y * test.ResultSpeed_y

In [214]:
# Vectorize average wind into north and east components
train['avg_wind_north_x'] = wind_dir_north_x * train.AvgSpeed_x
train['avg_wind_east_x'] = wind_dir_east_x * train.AvgSpeed_x

train['avg_wind_north_y'] = wind_dir_north_y * train.AvgSpeed_y
train['avg_wind_east_y'] = wind_dir_east_y * train.AvgSpeed_y

test['avg_wind_north_x'] = wind_dir_north_x * test.AvgSpeed_x
test['avg_wind_east_x'] = wind_dir_east_x * test.AvgSpeed_x

test['avg_wind_north_y'] = wind_dir_north_y * test.AvgSpeed_y
test['avg_wind_east_y'] = wind_dir_east_y * test.AvgSpeed_y

In [215]:
# train = pd.concat([train.reset_index(), (pd.get_dummies(train.year)).reset_index(), (pd.get_dummies(train.month)).reset_index(), (pd.get_dummies(train.Species)).reset_index()], axis='columns')
# test = pd.concat([test.reset_index(), (pd.get_dummies(test.year)).reset_index(), (pd.get_dummies(test.month)).reset_index(), (pd.get_dummies(test.Species)).reset_index()], axis='columns')

In [216]:
train = pd.get_dummies(train, columns=['Species'], drop_first=True)
test = pd.get_dummies(test, columns=['Species'], drop_first=True)

In [217]:
drop_list = ['PrecipTotal_x', 'PrecipTotal_y', 'Sunrise_x', 'Sunset_x',
             'Heat_x', 'Depth_x', 'SnowFall_x', 'ResultDir_x', 'ResultSpeed_x',
             'ResultDir_y', 'ResultSpeed_y', 'AvgSpeed_x', 'AvgSpeed_y', 'year',
             'month', 'day']

train.drop(drop_list + ['NumMosquitos'], axis='columns', inplace=True)
test.drop(drop_list + ['Species_7.0'], axis='columns', inplace=True)

In [218]:
# cols_to_use = [
#  'Block',
#  'Street',
#  'Trap',
#  'Latitude',
#  'Longitude',
#  'AddressAccuracy',
#  'day',
#  'Tmax_x',
#  'Tmin_x',
#  'Tavg_x',
#  'Depart_x',
#  'DewPoint_x',
#  'WetBulb_x',
#  'Cool_x',
#  'StnPressure_x',
#  'SeaLevel_x',
#  'Tmax_y',
#  'Tmin_y',
#  'Tavg_y',
#  'DewPoint_y',
#  'WetBulb_y',
#  'Heat_y',
#  'Cool_y',
#  'StnPressure_y',
#  'SeaLevel_y',
#  'temp_delta_x',
#  'temp_delta_y',
#  'israining_x',
#  'israining_y',
#  'sunlight',
#  'RH',
#  'isheat',
#  'wind_north_x',
#  'wind_east_x',
#  'wind_north_y',
#  'wind_east_y',
#  'avg_wind_north_x',
#  'avg_wind_east_x',
#  'avg_wind_north_y',
#  'avg_wind_east_y',
#  6.0,
#  7.0,
#  8.0,
#  9.0,
#  10.0,
#  0.0,
#  1.0,
#  2.0,
#  3.0,
#  4.0,
#  5.0,
#  6.0,
#  7.0]

# train = train[cols_to_use + ['WnvPresent']]
# test = test[cols_to_use]

In [219]:
# Set dependent (y) and indepedent (X) variables
y = train.WnvPresent
X = train.drop('WnvPresent', axis='columns')

In [220]:
# Baseline accuracy
1 - y.mean()

0.9475537787930707

In [221]:
# Scale X for modeling
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [222]:
# Train/test split for validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [223]:
# Random forest
cross_val_score(RandomForestClassifier(max_depth=4, max_features=5), X_train, y_train,
                cv=5, n_jobs=-1, verbose=0).mean()

0.9475250626167533

*Doesn't break baseline. What is happening?*

In [224]:
# Confustion matrix
clf = RandomForestClassifier(max_depth=4, max_features=5)
clf.fit(X_train, y_train)
confusion_matrix(y_test, clf.predict(X_test))

array([[1992,    0],
       [ 110,    0]])

*The model is maximizing accuracy by predicting "No" everytime. This is the result of have unbalanced classes.*

In [225]:
# Dataframe of positive West Nile results
pos_train = train[train.WnvPresent == 1]

In [226]:
# Oversampled unrepresented class
traino = pd.concat([train, pos_train], axis=0)
for _ in range(10):
    traino = pd.concat([traino, pos_train], axis=0)

In [227]:
# Oversampled train/test split
yo = traino.WnvPresent
Xo = traino.drop('WnvPresent', axis='columns')
Xo = pd.DataFrame(scaler.fit_transform(Xo), columns=Xo.columns)
X_traino, X_testo, y_traino, y_testo = train_test_split(Xo, yo, test_size=0.2, stratify=yo)

### We need to maximize recall

In [228]:
clf = RandomForestClassifier(max_depth=4, max_features=7)
clf.fit(X_traino, y_traino)
print(confusion_matrix(y_test, clf.predict(X_test)))
print(classification_report(y_test, clf.predict(X_test), target_names=['No', 'Yes']))

[[1554  438]
 [  38   72]]
             precision    recall  f1-score   support

         No       0.98      0.78      0.87      1992
        Yes       0.14      0.65      0.23       110

avg / total       0.93      0.77      0.83      2102



In [229]:
np.random.seed()

In [230]:
model = Sequential()
input_units = X_train.shape[1]
hidden_units = input_units

model.add(Dense(hidden_units * 2,
                input_dim=input_units,
                activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(hidden_units * 2,
                input_dim=input_units,
                activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10,
                input_dim=input_units,
                activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
train_loss = []
test_loss = []
train_acc = []
test_acc = []

In [None]:
%%time
hist = model.fit(X_traino, y_traino,
                 validation_data=(X_test, y_test),
                 epochs=750,
                 batch_size=64,
                 shuffle=True)

Train on 13253 samples, validate on 2102 samples
Epoch 1/750
Epoch 2/750
Epoch 3/750
Epoch 4/750
Epoch 5/750
Epoch 6/750
Epoch 7/750
Epoch 8/750
Epoch 9/750
Epoch 10/750
Epoch 11/750
Epoch 12/750
Epoch 13/750
Epoch 14/750
Epoch 15/750
Epoch 16/750
Epoch 17/750
Epoch 18/750
Epoch 19/750
Epoch 20/750
Epoch 21/750
Epoch 22/750
Epoch 23/750
Epoch 24/750
Epoch 25/750
Epoch 26/750
Epoch 27/750
Epoch 28/750
Epoch 29/750
Epoch 30/750
Epoch 31/750
Epoch 32/750
Epoch 33/750
Epoch 34/750
Epoch 35/750
Epoch 36/750
Epoch 37/750
Epoch 38/750
Epoch 39/750
Epoch 40/750
Epoch 41/750
Epoch 42/750
Epoch 43/750
Epoch 44/750
Epoch 45/750
Epoch 46/750
Epoch 47/750
Epoch 48/750
Epoch 49/750
Epoch 50/750
Epoch 51/750
Epoch 52/750
Epoch 53/750
Epoch 54/750
Epoch 55/750
Epoch 56/750
Epoch 57/750
Epoch 58/750
Epoch 59/750


Epoch 60/750
Epoch 61/750
Epoch 62/750
Epoch 63/750
Epoch 64/750
Epoch 65/750
Epoch 66/750
Epoch 67/750
Epoch 68/750
Epoch 69/750
Epoch 70/750
Epoch 71/750
Epoch 72/750
Epoch 73/750
Epoch 74/750
Epoch 75/750
Epoch 76/750
Epoch 77/750
Epoch 78/750
Epoch 79/750
Epoch 80/750
Epoch 81/750
Epoch 82/750
Epoch 83/750
Epoch 84/750
Epoch 85/750
Epoch 86/750
Epoch 87/750
Epoch 88/750
Epoch 89/750
Epoch 90/750
Epoch 91/750
Epoch 92/750
Epoch 93/750
Epoch 94/750
Epoch 95/750
Epoch 96/750
Epoch 97/750
Epoch 98/750
Epoch 99/750
Epoch 100/750
Epoch 101/750
Epoch 102/750
Epoch 103/750
Epoch 104/750
Epoch 105/750
Epoch 106/750
Epoch 107/750
Epoch 108/750
Epoch 109/750
Epoch 110/750
Epoch 111/750
Epoch 112/750
Epoch 113/750
Epoch 114/750
Epoch 115/750
Epoch 116/750
Epoch 117/750


Epoch 118/750
Epoch 119/750
Epoch 120/750
Epoch 121/750
Epoch 122/750
Epoch 123/750
Epoch 124/750
Epoch 125/750
Epoch 126/750
Epoch 127/750
Epoch 128/750
Epoch 129/750
Epoch 130/750
Epoch 131/750
Epoch 132/750
Epoch 133/750
Epoch 134/750
Epoch 135/750
Epoch 136/750
Epoch 137/750
Epoch 138/750
Epoch 139/750
Epoch 140/750
Epoch 141/750
Epoch 142/750
Epoch 143/750
Epoch 144/750
Epoch 145/750
Epoch 146/750
Epoch 147/750
Epoch 148/750
Epoch 149/750
Epoch 150/750
Epoch 151/750
Epoch 152/750
Epoch 153/750
Epoch 154/750
Epoch 155/750
Epoch 156/750
Epoch 157/750
Epoch 158/750
Epoch 159/750
Epoch 160/750
Epoch 161/750
Epoch 162/750
Epoch 163/750
Epoch 164/750
Epoch 165/750
Epoch 166/750
Epoch 167/750
Epoch 168/750
Epoch 169/750
Epoch 170/750
Epoch 171/750
Epoch 172/750
Epoch 173/750
Epoch 174/750
Epoch 175/750
Epoch 176/750


Epoch 177/750
Epoch 178/750
Epoch 179/750
Epoch 180/750
Epoch 181/750
Epoch 182/750
Epoch 183/750
Epoch 184/750
Epoch 185/750
Epoch 186/750
Epoch 187/750
Epoch 188/750
Epoch 189/750
Epoch 190/750
Epoch 191/750
Epoch 192/750
Epoch 193/750
Epoch 194/750
Epoch 195/750
Epoch 196/750
Epoch 197/750
Epoch 198/750
Epoch 199/750
Epoch 200/750
Epoch 201/750
Epoch 202/750
Epoch 203/750
Epoch 204/750
Epoch 205/750
Epoch 206/750
Epoch 207/750
Epoch 208/750
Epoch 209/750
Epoch 210/750
Epoch 211/750
Epoch 212/750
Epoch 213/750
Epoch 214/750
Epoch 215/750
Epoch 216/750
Epoch 217/750
Epoch 218/750
Epoch 219/750
Epoch 220/750
Epoch 221/750
Epoch 222/750
Epoch 223/750
Epoch 224/750
Epoch 225/750
Epoch 226/750
Epoch 227/750
Epoch 228/750
Epoch 229/750
Epoch 230/750
Epoch 231/750
Epoch 232/750
Epoch 233/750
Epoch 234/750


Epoch 235/750
Epoch 236/750
Epoch 237/750
Epoch 238/750
Epoch 239/750
Epoch 240/750
Epoch 241/750
Epoch 242/750
Epoch 243/750
Epoch 244/750
Epoch 245/750
Epoch 246/750
Epoch 247/750
Epoch 248/750
Epoch 249/750
Epoch 250/750
Epoch 251/750
Epoch 252/750
Epoch 253/750
Epoch 254/750
Epoch 255/750
Epoch 256/750
Epoch 257/750
Epoch 258/750
Epoch 259/750
Epoch 260/750
Epoch 261/750
Epoch 262/750
Epoch 263/750
Epoch 264/750
Epoch 265/750
Epoch 266/750
Epoch 267/750
Epoch 268/750
Epoch 269/750
Epoch 270/750
Epoch 271/750
Epoch 272/750
Epoch 273/750
Epoch 274/750
Epoch 275/750
Epoch 276/750
Epoch 277/750
Epoch 278/750
Epoch 279/750
Epoch 280/750
Epoch 281/750
Epoch 282/750
Epoch 283/750
Epoch 284/750
Epoch 285/750
Epoch 286/750
Epoch 287/750
Epoch 288/750
Epoch 289/750
Epoch 290/750
Epoch 291/750
Epoch 292/750


Epoch 293/750
Epoch 294/750
Epoch 295/750
Epoch 296/750
Epoch 297/750
Epoch 298/750
Epoch 299/750
Epoch 300/750
Epoch 301/750
Epoch 302/750
Epoch 303/750
Epoch 304/750
Epoch 305/750
Epoch 306/750
Epoch 307/750
Epoch 308/750
Epoch 309/750
Epoch 310/750
Epoch 311/750
Epoch 312/750
Epoch 313/750
Epoch 314/750
Epoch 315/750
Epoch 316/750
Epoch 317/750
Epoch 318/750
Epoch 319/750
Epoch 320/750
Epoch 321/750
Epoch 322/750
Epoch 323/750
Epoch 324/750
Epoch 325/750
Epoch 326/750
Epoch 327/750
Epoch 328/750
Epoch 329/750
Epoch 330/750
Epoch 331/750
Epoch 332/750
Epoch 333/750
Epoch 334/750
Epoch 335/750
Epoch 336/750
Epoch 337/750
Epoch 338/750
Epoch 339/750
Epoch 340/750
Epoch 341/750
Epoch 342/750
Epoch 343/750
Epoch 344/750
Epoch 345/750
Epoch 346/750
Epoch 347/750
Epoch 348/750
Epoch 349/750
Epoch 350/750


Epoch 351/750
Epoch 352/750
Epoch 353/750
Epoch 354/750
Epoch 355/750
Epoch 356/750
Epoch 357/750
Epoch 358/750
Epoch 359/750
Epoch 360/750
Epoch 361/750
Epoch 362/750
Epoch 363/750
Epoch 364/750
Epoch 365/750
Epoch 366/750
Epoch 367/750
Epoch 368/750
Epoch 369/750
Epoch 370/750
Epoch 371/750
Epoch 372/750
Epoch 373/750
Epoch 374/750
Epoch 375/750
Epoch 376/750
Epoch 377/750
Epoch 378/750
Epoch 379/750
Epoch 380/750
Epoch 381/750
Epoch 382/750
Epoch 383/750
Epoch 384/750
Epoch 385/750
Epoch 386/750
Epoch 387/750
Epoch 388/750
Epoch 389/750
Epoch 390/750
Epoch 391/750
Epoch 392/750
Epoch 393/750
Epoch 394/750
Epoch 395/750
Epoch 396/750
Epoch 397/750
Epoch 398/750
Epoch 399/750
Epoch 400/750
Epoch 401/750
Epoch 402/750
Epoch 403/750
Epoch 404/750
Epoch 405/750
Epoch 406/750
Epoch 407/750
Epoch 408/750


Epoch 409/750
Epoch 410/750
Epoch 411/750
Epoch 412/750
Epoch 413/750
Epoch 414/750
Epoch 415/750
Epoch 416/750
Epoch 417/750
Epoch 418/750
Epoch 419/750
Epoch 420/750
Epoch 421/750
Epoch 422/750
Epoch 423/750
Epoch 424/750
Epoch 425/750
Epoch 426/750
Epoch 427/750
Epoch 428/750
Epoch 429/750
Epoch 430/750
Epoch 431/750
Epoch 432/750
Epoch 433/750
Epoch 434/750
Epoch 435/750
Epoch 436/750
Epoch 437/750
Epoch 438/750
Epoch 439/750
Epoch 440/750
Epoch 441/750
Epoch 442/750
Epoch 443/750
Epoch 444/750
Epoch 445/750
Epoch 446/750
Epoch 447/750
Epoch 448/750
Epoch 449/750
Epoch 450/750
Epoch 451/750
Epoch 452/750
Epoch 453/750
Epoch 454/750
Epoch 455/750
Epoch 456/750
Epoch 457/750
Epoch 458/750
Epoch 459/750
Epoch 460/750
Epoch 461/750
Epoch 462/750
Epoch 463/750
Epoch 464/750
Epoch 465/750
Epoch 466/750


Epoch 467/750
Epoch 468/750
Epoch 469/750
Epoch 470/750
Epoch 471/750
Epoch 472/750
Epoch 473/750
Epoch 474/750
Epoch 475/750
Epoch 476/750
Epoch 477/750
Epoch 478/750
Epoch 479/750
Epoch 480/750
Epoch 481/750
Epoch 482/750
Epoch 483/750
Epoch 484/750
Epoch 485/750
Epoch 486/750
Epoch 487/750
Epoch 488/750
Epoch 489/750
Epoch 490/750
Epoch 491/750
Epoch 492/750
Epoch 493/750
Epoch 494/750
Epoch 495/750
Epoch 496/750
Epoch 497/750
Epoch 498/750
Epoch 499/750
Epoch 500/750
Epoch 501/750
Epoch 502/750
Epoch 503/750
Epoch 504/750
Epoch 505/750
Epoch 506/750
Epoch 507/750
Epoch 508/750
Epoch 509/750
Epoch 510/750
Epoch 511/750
Epoch 512/750
Epoch 513/750
Epoch 514/750
Epoch 515/750
Epoch 516/750
Epoch 517/750
Epoch 518/750
Epoch 519/750
Epoch 520/750
Epoch 521/750
Epoch 522/750
Epoch 523/750
Epoch 524/750


Epoch 525/750
Epoch 526/750
Epoch 527/750
Epoch 528/750
Epoch 529/750
Epoch 530/750
Epoch 531/750
Epoch 532/750
Epoch 533/750
Epoch 534/750
Epoch 535/750
Epoch 536/750
Epoch 537/750
Epoch 538/750
Epoch 539/750
Epoch 540/750
Epoch 541/750
Epoch 542/750
Epoch 543/750
Epoch 544/750
Epoch 545/750
Epoch 546/750
Epoch 547/750
Epoch 548/750
Epoch 549/750
Epoch 550/750
Epoch 551/750
Epoch 552/750
Epoch 553/750
Epoch 554/750
Epoch 555/750
Epoch 556/750
Epoch 557/750
Epoch 558/750
Epoch 559/750
Epoch 560/750
Epoch 561/750
Epoch 562/750
Epoch 563/750
Epoch 564/750
Epoch 565/750
Epoch 566/750
Epoch 567/750
Epoch 568/750
Epoch 569/750
Epoch 570/750
Epoch 571/750
Epoch 572/750
Epoch 573/750
Epoch 574/750
Epoch 575/750
Epoch 576/750
Epoch 577/750
Epoch 578/750
Epoch 579/750
Epoch 580/750
Epoch 581/750
Epoch 582/750


Epoch 583/750
Epoch 584/750
Epoch 585/750
Epoch 586/750
Epoch 587/750
Epoch 588/750
Epoch 589/750
Epoch 590/750
Epoch 591/750
Epoch 592/750
Epoch 593/750
Epoch 594/750
Epoch 595/750
Epoch 596/750
Epoch 597/750
Epoch 598/750
Epoch 599/750
Epoch 600/750
Epoch 601/750
Epoch 602/750
Epoch 603/750
Epoch 604/750
Epoch 605/750
Epoch 606/750
Epoch 607/750
Epoch 608/750
Epoch 609/750
Epoch 610/750
Epoch 611/750
Epoch 612/750
Epoch 613/750
Epoch 614/750
Epoch 615/750
Epoch 616/750
Epoch 617/750
Epoch 618/750
Epoch 619/750
Epoch 620/750
Epoch 621/750
Epoch 622/750
Epoch 623/750
Epoch 624/750
Epoch 625/750
Epoch 626/750
Epoch 627/750
Epoch 628/750
Epoch 629/750
Epoch 630/750
Epoch 631/750
Epoch 632/750
Epoch 633/750
Epoch 634/750
Epoch 635/750
Epoch 636/750
Epoch 637/750
Epoch 638/750
Epoch 639/750
Epoch 640/750


Epoch 641/750
Epoch 642/750
Epoch 643/750
Epoch 644/750
Epoch 645/750
Epoch 646/750
Epoch 647/750
Epoch 648/750
Epoch 649/750
Epoch 650/750
Epoch 651/750
Epoch 652/750
Epoch 653/750
Epoch 654/750
Epoch 655/750
Epoch 656/750
Epoch 657/750
Epoch 658/750
Epoch 659/750
Epoch 660/750
Epoch 661/750
Epoch 662/750
Epoch 663/750
Epoch 664/750
Epoch 665/750
Epoch 666/750
Epoch 667/750
Epoch 668/750
Epoch 669/750
Epoch 670/750
Epoch 671/750
Epoch 672/750
Epoch 673/750
Epoch 674/750
Epoch 675/750
Epoch 676/750
Epoch 677/750
Epoch 678/750
Epoch 679/750
Epoch 680/750
Epoch 681/750
Epoch 682/750
Epoch 683/750
Epoch 684/750
Epoch 685/750
Epoch 686/750
Epoch 687/750
Epoch 688/750
Epoch 689/750
Epoch 690/750
Epoch 691/750
Epoch 692/750
Epoch 693/750
Epoch 694/750
Epoch 695/750
Epoch 696/750
Epoch 697/750
Epoch 698/750


Epoch 699/750
Epoch 700/750
Epoch 701/750
Epoch 702/750
Epoch 703/750
Epoch 704/750
Epoch 705/750
Epoch 706/750
Epoch 707/750
Epoch 708/750
Epoch 709/750
Epoch 710/750
Epoch 711/750
Epoch 712/750
Epoch 713/750
Epoch 714/750
Epoch 715/750
Epoch 716/750
Epoch 717/750
Epoch 718/750

In [None]:
train_loss += hist.history['loss']
test_loss += hist.history['val_loss']
train_acc += hist.history['acc']
test_acc += hist.history['val_acc']

plt.figure()
plt.plot(train_loss)
plt.plot(test_loss)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best') ## I love this loc = 'best' command.
plt.show()
plt.figure()
plt.plot(train_acc)
plt.plot(test_acc)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()
test_acc[-1]

In [None]:
roc_auc_score(y_test, model.predict(X_test))

In [None]:
roc_curve(y_test, model.predict(X_test))

In [None]:
for threshold in np.linspace(0, 1, 101):
    print('Threshold: {}'.format(threshold))
    print('Confusion Matrix')
    print(confusion_matrix(y_test, (model.predict(X_test) > threshold).astype(float)))
    print(classification_report(y_test, (model.predict(X_test) > threshold).astype(float), target_names=['No', 'Yes']))
    print('*******************************\n')

In [None]:
y_testo.mean()

In [None]:
sens = []
spec = []
most_goodest = []
pred = model.predict(X_test)
for n in np.linspace(0, 1, 10001):
    temp = 1 * (pred > n)
    tn, fp, fn, tp = confusion_matrix(y_test, temp).ravel()
    sens += [tp / (tp + fn)]
    spec += [1 - (tn / (tn + fp))]
    most_goodest += [tp + tn]
# sens += [0]
# spec += [0]
plt.plot(spec, sens);
plt.show()
plt.plot(np.linspace(0, 1, 10001), most_goodest)
plt.show()

In [86]:
pred

array([[9.32551871e-21],
       [9.96978164e-01],
       [1.55285333e-11],
       ...,
       [1.26641605e-11],
       [1.39407691e-13],
       [0.00000000e+00]], dtype=float32)

In [62]:
sens, spec

([1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  0.9994472084024323,
  0.9994496422674739,
  0.9994496422674739,
  0.999450247388675,
  0.999450247388675,
  0.999450247388675,
  0.9994505494505495,
  0.9994520547945206,
  0.9994523548740416,
  0.9994523548740416,
  0.9994532531437944,
  0.9994532531437944,
  0.9994535519125683,
  0.9994541484716157,
  0.9994541484716157,
  0.9994547437295529,
  0.9978354978354979,
  0.9978354978354979,
  0.997836668469443,
  0.9978413383702105,
  0.997843665768194,
  0.9967793880837359,
  0.9967931587386424,
  0.9957356076759062,
  0.9957401490947817,
  0.9952127659574468,
  0.9952153110047847,
  0.9952178533475027,
  0.9952229299363057,
  0.9952229299363057,
  0.9946977730646872,
  0.9946977730646872,
  0.9947201689545935,
  0.9942013705851345,
  0.9936775553213909,
  0.9936808846761453,
  0.9936808846761453,
  0.9936842105263158,
 

In [106]:
max([x for x in zip(most_goodest, np.linspace(0, 1, 10001))])

(2037, 0.9792000000000001)

In [46]:
temp = 1 * (pred > .9792)
tn, fp, fn, tp = confusion_matrix(y_test, temp).ravel()
tn, fp, fn, tp

(1984, 8, 86, 24)

In [75]:
mat[0][1]

120

In [91]:
np.linspace(0, 1, 1001)

array([0.   , 0.001, 0.002, ..., 0.998, 0.999, 1.   ])

In [112]:
sample.WnvPresent

0         0
1         0
2         0
3         0
4         0
5         0
6         0
7         0
8         0
9         0
10        0
11        0
12        0
13        0
14        0
15        0
16        0
17        0
18        0
19        0
20        0
21        0
22        0
23        0
24        0
25        0
26        0
27        0
28        0
29        0
         ..
116263    0
116264    0
116265    0
116266    0
116267    0
116268    0
116269    0
116270    0
116271    0
116272    0
116273    0
116274    0
116275    0
116276    0
116277    0
116278    0
116279    0
116280    0
116281    0
116282    0
116283    0
116284    0
116285    0
116286    0
116287    0
116288    0
116289    0
116290    0
116291    0
116292    0
Name: WnvPresent, Length: 116293, dtype: int64

In [None]:
preds = model.predict(test)
submission = pd.DataFrame([[i,x[0]] for i, x in enumerate(preds.tolist())], columns=sample.columns)

In [131]:
submission.to_csv('submission.csv')

In [133]:
submission.shape

(2102, 2)

In [86]:
exclusions = [x for x in train.columns if x not in test.columns] + [x for x in test.columns if x not in train.columns]

In [87]:
exclusions

['NumMosquitos', 'WnvPresent', 'Id']

In [160]:
beep = train.drop(columns=exclusions)
boop = test.drop(columns=exclusions)

In [178]:
boop.index

RangeIndex(start=0, stop=116293, step=1)

In [193]:
trainer = train.drop(columns='WnvPresent')

In [194]:
trainer.columns

Index(['Block', 'Street', 'Trap', 'Latitude', 'Longitude', 'AddressAccuracy',
       'Tmax_x', 'Tmin_x', 'Tavg_x', 'Depart_x', 'DewPoint_x', 'WetBulb_x',
       'Cool_x', 'StnPressure_x', 'SeaLevel_x', 'Tmax_y', 'Tmin_y', 'Tavg_y',
       'DewPoint_y', 'WetBulb_y', 'Heat_y', 'Cool_y', 'StnPressure_y',
       'SeaLevel_y', 'temp_delta_x', 'temp_delta_y', 'israining_x',
       'israining_y', 'sunlight', 'RH', 'isheat', 'wind_north_x',
       'wind_east_x', 'wind_north_y', 'wind_east_y', 'avg_wind_north_x',
       'avg_wind_east_x', 'avg_wind_north_y', 'avg_wind_east_y', 'Species_1.0',
       'Species_2.0', 'Species_3.0', 'Species_4.0', 'Species_5.0',
       'Species_6.0'],
      dtype='object')

In [189]:
test.columns

Index(['Block', 'Street', 'Trap', 'Latitude', 'Longitude', 'AddressAccuracy',
       'Tmax_x', 'Tmin_x', 'Tavg_x', 'Depart_x', 'DewPoint_x', 'WetBulb_x',
       'Cool_x', 'StnPressure_x', 'SeaLevel_x', 'Tmax_y', 'Tmin_y', 'Tavg_y',
       'DewPoint_y', 'WetBulb_y', 'Heat_y', 'Cool_y', 'StnPressure_y',
       'SeaLevel_y', 'temp_delta_x', 'temp_delta_y', 'israining_x',
       'israining_y', 'sunlight', 'RH', 'isheat', 'wind_north_x',
       'wind_east_x', 'wind_north_y', 'wind_east_y', 'avg_wind_north_x',
       'avg_wind_east_x', 'avg_wind_north_y', 'avg_wind_east_y', 'Species_1.0',
       'Species_2.0', 'Species_3.0', 'Species_4.0', 'Species_5.0',
       'Species_6.0', 'Species_7.0'],
      dtype='object')

In [196]:
[x for x,y in zip(trainer.columns, test.columns) if x==y]

['Block',
 'Street',
 'Trap',
 'Latitude',
 'Longitude',
 'AddressAccuracy',
 'Tmax_x',
 'Tmin_x',
 'Tavg_x',
 'Depart_x',
 'DewPoint_x',
 'WetBulb_x',
 'Cool_x',
 'StnPressure_x',
 'SeaLevel_x',
 'Tmax_y',
 'Tmin_y',
 'Tavg_y',
 'DewPoint_y',
 'WetBulb_y',
 'Heat_y',
 'Cool_y',
 'StnPressure_y',
 'SeaLevel_y',
 'temp_delta_x',
 'temp_delta_y',
 'israining_x',
 'israining_y',
 'sunlight',
 'RH',
 'isheat',
 'wind_north_x',
 'wind_east_x',
 'wind_north_y',
 'wind_east_y',
 'avg_wind_north_x',
 'avg_wind_east_x',
 'avg_wind_north_y',
 'avg_wind_east_y',
 'Species_1.0',
 'Species_2.0',
 'Species_3.0',
 'Species_4.0',
 'Species_5.0',
 'Species_6.0']