In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_1 = pd.read_csv("datatraining.csv")
data_2 = pd.read_csv("datatest.csv")
data_3 = pd.read_csv("datatest2.csv")

uci_data = pd.concat([data_1, data_2, data_3])
uci_data.date = pd.to_datetime(uci_data.date)
uci_data.set_index('date', inplace=True)

# Resampling

In [3]:
uci_sampling = uci_data.resample('5min').mean()

# Fill Missing Value

In [4]:
uci_sampling = uci_sampling.interpolate(method ='linear', limit_direction ='forward')

In [5]:
uci_sampling['Occupancy'] = uci_sampling['Occupancy'].apply(np.ceil)

In [6]:
true_class = uci_sampling.values[3628: , 5]

# Normalize

In [7]:
def normalize(dataset):
    dataNorm=((dataset-dataset.min())/(dataset.max() - dataset.min()))
    dataNorm['Occupancy'] = dataset['Occupancy']
    return dataNorm

In [8]:
datanorm = normalize(uci_sampling)

In [9]:
X = datanorm.iloc[:,0:5]
y = datanorm.iloc[:,5].apply(np.ceil)
y = y.astype(int)

# Stratified K-fold

In [10]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10,random_state=None, shuffle=False)
skf.get_n_splits(X,y)

Xtrain = []
ytrain = []
Xtest = []
ytest = []

for train_index, test_index in skf.split(X,y):    
    X_train, X_test = X.values[train_index], X.values[test_index]
    y_train, y_test = y.values[train_index], y.values[test_index]
    Xtrain.append(X_train)
    ytrain.append(y_train)
    Xtest.append(X_test)
    ytest.append(y_test)

for i in range (10):
    Xtrain[i] = pd.DataFrame(columns=['temp', 'humid', 'light', 'co2', 'humid_ratio'], data = Xtrain[i])
    ytrain[i] = pd.DataFrame(columns=['Occupancy'], data = ytrain[i])
    Xtest[i] = pd.DataFrame(columns=['temp', 'humid', 'light', 'co2', 'humid_ratio'], data = Xtest[i])
    ytest[i] = pd.DataFrame(columns=['Occupancy'], data = ytest[i])

In [11]:
predict_new = pd.read_excel('prediction_new_rev.xlsx')

# SVC

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score

accsvm_4 = []
for i in range(10):
    mysvm = SVC(C=25, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
    mysvm = mysvm.fit(Xtrain[i], ytrain[i])
    my_svmpredict = mysvm.predict(Xtest[i])
    acc_svmtest = balanced_accuracy_score(ytest[i], my_svmpredict)
    accsvm_4.append(acc_svmtest)
print(mysvm)
print("balanced accuracy =", np.mean(accsvm_4))

SVC(C=25, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
balanced accuracy = 0.9889520396506402


In [12]:
from sklearn.svm import SVC
svmlearn = SVC(C=25, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
svm = svmlearn.fit(X,y)

In [14]:
new_prediction = svm.predict(predict_new)

In [15]:
from sklearn.metrics import balanced_accuracy_score
acc_svm = balanced_accuracy_score(true_class, new_prediction)
acc_svm

0.5

# Random Forest

In [27]:
from sklearn.ensemble import RandomForestClassifier

accrf = []
for i in range(10):
    clfrf = RandomForestClassifier(bootstrap = True,max_depth= 80,max_features= 2,min_samples_leaf= 3,min_samples_split= 12,
  n_estimators= 100,random_state = 6)
    RFfit = clfrf.fit(Xtrain[i], ytrain[i])
    RFpred = clfrf.predict(Xtest[i])
    acc_rftest = balanced_accuracy_score(ytest[i], RFpred)
    accrf.append(acc_rftest)

print("balanced accuracy =", np.mean(accrf))

balanced accuracy = 0.9799336439745587


In [16]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(bootstrap = True,max_depth= 80,max_features= 2,min_samples_leaf= 3,min_samples_split= 12,
  n_estimators= 100,random_state = 6)
rf_learn = rf_classifier.fit(X,y)

In [17]:
new_predict_RF = rf_learn.predict(predict_new)

In [18]:
from sklearn.metrics import balanced_accuracy_score
acc_RF = balanced_accuracy_score(true_class, new_predict_RF)
acc_RF

0.1908177905308465

# Decision Tree 

In [19]:
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

balanced = []
y_preddtc = []
for i in range (10):
    model = DecisionTreeClassifier()
    modelfitness = model.fit(Xtrain[i], ytrain[i])
    DTCpredicted = modelfitness.predict(Xtest[i])
    y_preddtc.append(DTCpredicted)
    dtcbalanced = balanced_accuracy_score(ytest[i],DTCpredicted)
    balanced.append(dtcbalanced)
    averagedtc = np.mean(balanced)
print("MAE=" , averagedtc) 

MAE= 0.9474619505013129


In [20]:
from sklearn.tree import DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier()
dt_learn = dt_classifier.fit(X,y)

In [21]:
new_predict_dt = dt_learn.predict(predict_new)

In [22]:
from sklearn.metrics import balanced_accuracy_score
acc_dt = balanced_accuracy_score(true_class, new_predict_dt)
acc_dt

0.1908177905308465

# MLP

In [13]:
from sklearn.neural_network import MLPClassifier

accmlp = []
for i in range(10):
    clfMLP = MLPClassifier(solver='lbfgs', alpha=1e-5, n_iter_no_change=1000,
                hidden_layer_sizes=(10, 3), random_state=6,learning_rate_init=0.3)
    MLPfitness = clfMLP.fit(Xtrain[i],ytrain[i])
    ypredmlp = MLPfitness.predict(Xtest[i])
    acc_mlptest = balanced_accuracy_score(ytest[i], ypredmlp)
    accmlp.append(acc_mlptest)
print(clfMLP)
print("balanced accuracy =", np.mean(accmlp))

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 3), learning_rate='constant',
       learning_rate_init=0.3, max_iter=200, momentum=0.9,
       n_iter_no_change=1000, nesterovs_momentum=True, power_t=0.5,
       random_state=6, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)
balanced accuracy = 0.9774610500083545


In [24]:
from sklearn.neural_network import MLPClassifier

mlp_classifier = MLPClassifier(solver='lbfgs', alpha=1e-5, n_iter_no_change=1000,
                hidden_layer_sizes=(10, 3), random_state=6,learning_rate_init=0.3)
mlp_learn = mlp_classifier.fit(X,y)

In [25]:
new_predict_mlp = mlp_learn.predict(predict_new)

In [26]:
from sklearn.metrics import balanced_accuracy_score
acc_mlp = balanced_accuracy_score(true_class, new_predict_mlp)
acc_mlp

0.3127690100430416

# KNN 

In [14]:
from sklearn.neighbors import KNeighborsClassifier

accknn = []
for i in range(10):
    neigh = KNeighborsClassifier(metric ='manhattan', n_neighbors= 3, weights = 'distance')
    knnfitness = neigh.fit(Xtrain[i], ytrain[i])
    knnpredict = neigh.predict(Xtest[i])
    acc_knntest = balanced_accuracy_score(ytest[i], knnpredict)
    accknn.append(acc_knntest)
print(neigh)
print("balanced accuracy =", np.mean(accknn))

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='manhattan',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='distance')
balanced accuracy = 0.955752258024116


In [27]:
from sklearn.neighbors import KNeighborsClassifier

KNN_classifier = KNeighborsClassifier(metric ='manhattan', n_neighbors= 3, weights = 'distance')
knn_learn = KNN_classifier.fit(X,y)

In [31]:
new_predict_knn = knn_learn.predict(predict_new)

In [32]:
from sklearn.metrics import balanced_accuracy_score
acc_knn = balanced_accuracy_score(true_class, new_predict_knn)
acc_knn

0.1908177905308465

# ANN

In [12]:
ground_truth = datanorm.iloc[12:y_test.shape[0]+12, 5].apply(np.ceil).values

In [13]:
features = X.values
target = y.values

In [14]:
len(Xtrain)

10

In [15]:
features_ =[]
target_ = []
testX = []
testy = []
for i in range(len(Xtrain)):
    values = Xtrain[i].values
    features_.append(values)
    yvalues = ytrain[i].values
    target_.append(yvalues)
    xtestval = Xtest[i].values
    testX.append(xtestval)
    ytestval = ytest[i].values
    testy.append(ytestval)

In [16]:
import numpy as np
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification

np.random.seed(0)

Using TensorFlow backend.


In [17]:
def create_network():
    
    network = models.Sequential()

    network.add(layers.Dense(units=30, activation='relu', input_shape=(features.shape[1],)))
    network.add(layers.Dense(units=1, activation='sigmoid'))

    network.compile(loss='binary_crossentropy', # Cross-entropy
                    optimizer='rmsprop', # Root Mean Square Propagation
                    metrics=['accuracy']) # Accuracy performance metric
    
    return network

In [18]:
neural_network = KerasClassifier(build_fn=create_network, 
                                 epochs=100, 
                                 batch_size=100, 
                                 verbose=0)

In [34]:
model_ann = create_network()                                                                                                                                                 
model_ann.fit(features_, target_, epochs=100, batch_size=100, verbose=0)
model_ann.evaluate(features, target)



[0.04613453939014128, 0.9909870301165091]

In [43]:
features_[0]

array([[0.70335913, 0.62578684, 0.38967536, 0.58757935, 0.69942474],
       [0.7014955 , 0.63097222, 0.38919892, 0.58989282, 0.70396648],
       [0.69664819, 0.63538092, 0.38686689, 0.59780977, 0.70632062],
       ...,
       [0.3298996 , 0.44068965, 0.01009294, 0.64158857, 0.36680829],
       [0.32848776, 0.45504951, 0.28532311, 0.66279708, 0.37949745],
       [0.34674754, 0.48786156, 0.36940171, 0.73993683, 0.41642802]])

In [20]:
from sklearn.metrics import balanced_accuracy_score

In [25]:
train_acc = []
test_acc = []
# for i in range(10):
for i in range(10):
    model_ann = KerasClassifier(build_fn=create_network, 
                                 epochs=100, 
                                 batch_size=100, 
                                 verbose=0)                                                                                                                                             
    model_ann.fit(features_[i], target_[i], epochs=100, batch_size=100, verbose=0)
    predicted_a = model_ann.predict(features_[i])
    train_accuracy = balanced_accuracy_score(target_[i],predicted_a)
    train_acc.append(train_accuracy)
    test_predict = model_ann.predict(testX[i])
    test_accuracy = balanced_accuracy_score(test_predict,testy[i])
    test_acc.append(test_accuracy)

In [26]:
np.mean(train_acc)

0.9897410563407053

In [27]:
np.mean(test_acc)

0.9876371649552453

In [17]:
result = cross_val_score(neural_network, features, target, cv=10)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [18]:
np.mean(result)

0.9868117370768553

## Build ANN Model 

In [23]:
model = create_network()                                                                                                                                                 
model.fit(features, target, epochs=100, batch_size=100, verbose=0, validation_split=0.2)

<keras.callbacks.History at 0x247b9694d68>

In [24]:
data_pred = pd.read_excel('prediction_new_rev.xlsx')
result_pred = model.predict(data_pred.values)

In [25]:
data_ori = uci_sampling.values[3628: , 0:5]
result_clas = model.predict(data_ori)

In [26]:
from sklearn.metrics import balanced_accuracy_score
acc = balanced_accuracy_score(true_class, result_pred)
acc

0.5

# Plot

In [None]:
p_rf = RFpred
p_rf

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

p_occupancy = y_test
p_rf = RFpred


plt.plot(y_test[:, 0], color='green')
plt.plot(predicted_value[:, 0], color= 'red')

plt.title("Opening price of stocks sold")
plt.xlabel("Time (latest-> oldest)")
plt.ylabel("Stock Opening Price")
plt.show()