# 1.0 Import Libraries

In [106]:
import warnings
warnings.filterwarnings('ignore')

import keras
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.core import Dense
from keras.optimizers import Adam
from keras.metrics import categorical_crossentropy
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np

# 2.0 Preprocessing Data

In [107]:
#train_data = pd.read_csv("F:/train.csv",index_col="0")
#test_data = pd.read_csv("F:/test.csv",index_col="0")
#submissions = pd.read_csv("F:/submission.csv")
#print(submissions)

In [108]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data'
train_data = pd.read_csv(url, header=None, na_values=['?'])
train_data.head()
train_data[127].describe()
train_data.drop([0, 1, 2, 3, 4], axis=1, inplace=True)
train_data.dropna(inplace=True)
train_data.shape
train_x = train_data.drop(127, axis=1)
#print(train_x)
train_y = train_data[127]
#print(train_y)

# 3.0 Splitting dataset

In [109]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, random_state=1)

# 4.0 Building Training Models

## 4.1 Linear Model

In [110]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train, y_train)

# examine the coefficients
print(linreg.coef_)

# make predictions
y_pred = linreg.predict(X_test)

[-3.66188167e+00  6.98124465e-01 -2.61955467e-01 -2.85270027e-01
 -1.64740837e-01  2.46972333e-01 -1.09290051e+00 -5.96857796e-01
  1.11200239e+00 -7.21968931e-01  4.27346598e+00 -2.28040268e-01
  8.04875769e-01 -2.57934732e-01 -2.63458023e-01 -1.04616958e+00
  6.07784197e-01  7.73552561e-01  5.96468029e-02  6.90215922e-01
  2.16759430e-02 -4.87802949e-01 -5.18858404e-01  1.39478815e-01
 -1.24417942e-01  3.15003821e-01 -1.52633736e-01 -9.65003927e-01
  1.17142163e+00 -3.08546690e-02 -9.29085548e-01  1.24654586e-01
  1.98104506e-01  7.30804821e-01 -1.77337294e-01  8.32927588e-02
  3.46045601e-01  5.01837338e-01  1.57062958e+00 -4.13478807e-01
  1.39350802e+00 -3.49428114e+00  7.09577818e-01 -8.32141352e-01
 -1.39984927e+00  1.02482840e+00  2.13855006e-01 -6.18937325e-01
  5.28954490e-01  7.98294890e-02  5.93688560e-02 -1.68582667e-01
  7.31264051e-01 -1.39635208e+00  2.38507704e-01  5.50621439e-01
 -5.61447867e-01  6.18989764e-01  2.55517024e+00 -3.71769599e+00
  7.09191935e-01  3.82041

### Calculate RMSE

In [111]:
# calculate RMSE
from sklearn import metrics
import numpy as np
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.2338136764948683


## 4.2 Lasso regression

In [112]:
from sklearn.linear_model import Lasso
lassoreg = Lasso(alpha=0.001, normalize=True)
lassoreg.fit(X_train, y_train)
print(lassoreg.coef_)

# try alpha=0.01 and examine coefficients
lassoreg = Lasso(alpha=0.01, normalize=True)
lassoreg.fit(X_train, y_train)
print(lassoreg.coef_)

[ 0.          0.          0.00891952 -0.27423369  0.          0.
  0.         -0.         -0.          0.          0.          0.
 -0.         -0.         -0.         -0.19414627  0.          0.
 -0.         -0.         -0.         -0.         -0.         -0.
 -0.          0.          0.          0.          0.04335664 -0.
  0.         -0.          0.03491474 -0.         -0.06685424  0.
  0.         -0.          0.10575313  0.          0.          0.00890807
  0.         -0.1378172  -0.30954312 -0.         -0.         -0.
 -0.          0.          0.          0.          0.         -0.
  0.          0.          0.          0.          0.          0.
 -0.          0.          0.          0.         -0.          0.
 -0.         -0.          0.          0.05257892 -0.          0.
 -0.         -0.          0.          0.          0.          0.
  0.         -0.         -0.         -0.         -0.         -0.
 -0.         -0.          0.         -0.         -0.          0.
  0.13861081  0. 

### Calculate RSME

In [113]:
# calculate RMSE (for alpha=0.01)
y_pred = lassoreg.predict(X_test)
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.19816522542866322


## 4.3 LassoCV

In [114]:
from sklearn.linear_model import LassoCV
lassoregcv = LassoCV(n_alphas=100, normalize=True, random_state=1)
lassoregcv.fit(X_train, y_train)
lassoregcv.alpha_

# examine the coefficients
print(lassoregcv.coef_)

[ 0.          0.          0.         -0.28113506  0.          0.
  0.          0.          0.          0.          0.          0.
 -0.         -0.         -0.         -0.15481092  0.          0.
 -0.         -0.         -0.         -0.         -0.         -0.
 -0.          0.         -0.          0.          0.06451487  0.
  0.         -0.          0.         -0.         -0.01920421  0.
  0.         -0.          0.03386202  0.          0.          0.08901243
  0.         -0.08759757 -0.36986917 -0.         -0.         -0.
 -0.          0.          0.          0.          0.         -0.
  0.          0.          0.          0.          0.          0.
 -0.          0.          0.          0.         -0.          0.
  0.         -0.          0.          0.01740599 -0.          0.
 -0.         -0.          0.          0.          0.          0.
  0.         -0.         -0.         -0.         -0.         -0.
 -0.         -0.          0.         -0.         -0.          0.
  0.13471036  0. 

### Calculate RSME

In [115]:
# predict method uses the best alpha value
y_pred = lassoregcv.predict(X_test)
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.1602095580138513


## 4.4  Neural Networks - 1 hidden layer

In [116]:
X_train = np.array(train_x)
#print(train_x)
y_train = np.array(train_y)
#print(train_y)

In [117]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
scaled_trained_samples = scaler.fit_transform(X_train).reshape(-1,1)
#print(scaled_trained_samples)

In [118]:
model = Sequential([
    Dense(16, input_shape=(122,), activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [119]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 16)                1968      
_________________________________________________________________
dense_13 (Dense)             (None, 32)                544       
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 33        
Total params: 2,545
Trainable params: 2,545
Non-trainable params: 0
_________________________________________________________________


In [120]:
model.compile(optimizer='Adam', loss='mse', metrics=['mse'])

In [121]:
model.fit(X_train, y_train, batch_size=10, epochs=10, shuffle=True, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24424f71ef0>

### Calculate RMSE

In [122]:
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.1602095580138513


## 4.5  Neural Networks - 2 hidden layers

In [123]:
X_train = np.array(train_x)
#print(train_x)
y_train = np.array(train_y)
#print(train_y)

In [124]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
scaled_trained_samples = scaler.fit_transform(X_train).reshape(-1,1)
#print(scaled_trained_samples)

In [125]:
model = Sequential([
    Dense(16, input_shape=(122,), activation='relu'),
    Dense(32, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [126]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 16)                1968      
_________________________________________________________________
dense_16 (Dense)             (None, 32)                544       
_________________________________________________________________
dense_17 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 33        
Total params: 3,601
Trainable params: 3,601
Non-trainable params: 0
_________________________________________________________________


In [127]:
model.compile(optimizer='Adam', loss='mse', metrics=['mse'])

In [128]:
model.fit(X_train, y_train, batch_size=10, epochs=10, shuffle=True, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24424dee3c8>

### Calculate RMSE

In [129]:
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.1602095580138513


## 4.6  Neural Networks - 2 hidden layers, x2 hidden nodes

In [130]:
X_train = np.array(train_x)
#print(train_x)
y_train = np.array(train_y)
#print(train_y)

In [131]:
scaler = MinMaxScaler(copy=True, feature_range=(0, 1))
scaled_trained_samples = scaler.fit_transform(X_train).reshape(-1,1)
#print(scaled_trained_samples)

In [132]:
model = Sequential([
    Dense(16, input_shape=(122,), activation='relu'),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [133]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 16)                1968      
_________________________________________________________________
dense_20 (Dense)             (None, 64)                1088      
_________________________________________________________________
dense_21 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 65        
Total params: 7,281
Trainable params: 7,281
Non-trainable params: 0
_________________________________________________________________


In [134]:
model.compile(optimizer='Adam', loss='mse', metrics=['mse'])

In [135]:
Z = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
model.fit(X_train, y_train, batch_size=len(Z), epochs=10, shuffle=True, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x24426260dd8>

### Calculate RMSE

In [136]:
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

0.1602095580138513


# 5.0 ENSEMBLE MODEL

## 5.1 GradientBoostingRegressor 

In [137]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=100, random_state=0, noise=1.0)
 
X_train, X_test = X[:200], X[:200]
y_train, y_test = y[:200], y[:200]
est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,
    max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)

### Calculate RMSE

In [138]:
#mean_squared_error(y_train, est.predict(X_train))   
np.sqrt(mean_squared_error(y_test, est.predict(X_test)))  

0.4037869618567291