In [53]:
import numpy as np
import pandas as pd
import datetime as dt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler

In [2]:
def read_data(file_path):
    data = pd.read_csv(file_path,header = 0)
    data["date"] = pd.to_datetime(data["date"])
    data["year"] = data["date"].dt.year
    data["month"] = data["date"].dt.month
    data["day"] = data["date"].dt.day
    data["hour"] = data["date"].dt.hour
    data["minute"] = data["date"].dt.minute
    data["second"] = data["date"].dt.second
    data['weekday'] = data[['date']].apply(lambda x: dt.datetime.strftime(x['date'], '%A'), axis=1)
    return data

In [3]:
training_data = read_data('datatraining.txt')

In [4]:
training_data[:4]

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy,year,month,day,hour,minute,second,weekday
1,2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1,2015,2,4,17,51,0,Wednesday
2,2015-02-04 17:51:59,23.15,27.2675,429.5,714.0,0.004783,1,2015,2,4,17,51,59,Wednesday
3,2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1,2015,2,4,17,53,0,Wednesday
4,2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1,2015,2,4,17,54,0,Wednesday


In [5]:
subset_features = training_data[["Occupancy","Temperature","Humidity","Light","CO2","HumidityRatio"]]

In [6]:
scaled_data =  training_data[["Temperature","Humidity","Light","CO2","HumidityRatio","Occupancy"]]

scaled_data[:4]

Unnamed: 0,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
1,23.18,27.272,426.0,721.25,0.004793,1
2,23.15,27.2675,429.5,714.0,0.004783,1
3,23.15,27.245,426.0,713.5,0.004779,1
4,23.15,27.2,426.0,708.25,0.004772,1


In [7]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [8]:
le = LabelEncoder()


In [9]:
weekdays = training_data['weekday']
weekdays.shape

(8143,)

In [10]:
week_enc = le.fit_transform(weekdays)
week_enc.shape

(8143,)

In [11]:
training_data['weekday'] = [int(str(a)) for a in week_enc]

In [12]:
training_data[:5]

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy,year,month,day,hour,minute,second,weekday
1,2015-02-04 17:51:00,23.18,27.272,426.0,721.25,0.004793,1,2015,2,4,17,51,0,6
2,2015-02-04 17:51:59,23.15,27.2675,429.5,714.0,0.004783,1,2015,2,4,17,51,59,6
3,2015-02-04 17:53:00,23.15,27.245,426.0,713.5,0.004779,1,2015,2,4,17,53,0,6
4,2015-02-04 17:54:00,23.15,27.2,426.0,708.25,0.004772,1,2015,2,4,17,54,0,6
5,2015-02-04 17:55:00,23.1,27.2,426.0,704.5,0.004757,1,2015,2,4,17,55,0,6


In [75]:
x = training_data.iloc[:,1:6]
y = training_data.iloc[:,6]

In [76]:
scaler = StandardScaler()
scaler.fit(x)
scaler.transform(x)

array([[2.51847007, 0.27852622, 1.57376283, 0.36494808, 1.09175697],
       [2.48896731, 0.27771261, 1.59173515, 0.34188106, 1.08055497],
       [2.48896731, 0.27364453, 1.57376283, 0.34029023, 1.07588816],
       ...,
       [0.47294534, 1.87375407, 1.60970748, 0.61073113, 2.03352707],
       [0.47294534, 1.90358662, 1.60970748, 0.68019732, 2.063811  ],
       [0.47294534, 1.89273842, 1.68159679, 0.68231843, 2.05279837]])

In [77]:
x[:4]


Unnamed: 0,Temperature,Humidity,Light,CO2,HumidityRatio
1,23.18,27.272,426.0,721.25,0.004793
2,23.15,27.2675,429.5,714.0,0.004783
3,23.15,27.245,426.0,713.5,0.004779
4,23.15,27.2,426.0,708.25,0.004772


In [78]:
y[:4]

1    1
2    1
3    1
4    1
Name: Occupancy, dtype: int64

In [79]:
from sklearn.svm import SVC

In [80]:
svc = SVC(gamma='scale')

In [81]:
svc.fit(x,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [82]:
test_data1 = pd.read_csv('datatest.txt')
test_data1[:4]

Unnamed: 0,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
140,2015-02-02 14:19:00,23.7,26.272,585.2,749.2,0.004764,1
141,2015-02-02 14:19:59,23.718,26.29,578.4,760.4,0.004773,1
142,2015-02-02 14:21:00,23.73,26.23,572.666667,769.666667,0.004765,1
143,2015-02-02 14:22:00,23.7225,26.125,493.75,774.75,0.004744,1


In [83]:
x_test = test_data1.iloc[:,1:6]

In [84]:
y_test = test_data1.iloc[:,6]

In [85]:
y_pred = svc.predict(x_test)

In [86]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9459662288930581