In [2]:
import MySQLdb as db
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.cross_validation import cross_val_score
%matplotlib inline

df = pd.read_csv('ABT-Avgs.csv')
df.head()



Unnamed: 0,DateTime,Room,Capacity,Module,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate
0,2015-11-03 16:00:00,B-002,90,COMP40370P1,27,39.0,0.25,1
1,2015-11-04 16:00:00,B-002,90,COMP30250P1,22,20.0,0.25,1
2,2015-11-05 16:00:00,B-002,90,COMP30520P1,60,32.6667,0.0,0
3,2015-11-06 16:00:00,B-002,90,,0,20.3333,0.5,1
4,2015-11-09 16:00:00,B-002,90,COMP40660P1,53,72.5,0.25,1


### For logistic regression, we need to make the target feature categorical. For this, we can bin the value ranges. 

In [3]:
# http://chrisalbon.com/python/pandas_create_column_using_conditional.html
# Add emtimate column
df['estimate'] = df['Capacity'] * df['PercentageEstimate']
# Bin results into categories for logistic regression. 
bins = [-1, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250]
groups = [ '0-25', '25-50', '50-75', '75-100', '100-125', '125-150', '150-175', '175-200', '200-225', '225-250']
df['occupantEstimate'] = pd.cut(df['estimate'], bins, labels = groups )

In [4]:
df.dtypes

DateTime                object
Room                    object
Capacity                 int64
Module                  object
NumReg                   int64
AvgNumWifiConn         float64
PercentageEstimate     float64
BinaryEstimate           int64
estimate               float64
occupantEstimate      category
dtype: object

In [5]:
df['DateTime'] = df['DateTime'].astype('datetime64')
df['Room'] = df['Room'].astype('category')
df['Module'] = df['Module'].astype('category')
df.dtypes

DateTime              datetime64[ns]
Room                        category
Capacity                       int64
Module                      category
NumReg                         int64
AvgNumWifiConn               float64
PercentageEstimate           float64
BinaryEstimate                 int64
estimate                     float64
occupantEstimate            category
dtype: object

In [6]:
df.shape

(216, 10)

### For logistic regression, we need to make the training features to be continuous. To do this we need to create dummy values for catergorical features. 


In [7]:
days = ['mon', 'tues', 'wed', 'thurs', 'fri', 'sat', 'sun']
# http://stackoverflow.com/questions/13740672/in-pandas-how-can-i-groupby-weekday-for-a-datetime-column
# http://chrisalbon.com/python/pandas_apply_operations_to_dataframes.html
df['weekday'] = df['DateTime'].apply(lambda dt: dt.weekday())
df.head()


Unnamed: 0,DateTime,Room,Capacity,Module,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate,estimate,occupantEstimate,weekday
0,2015-11-03 16:00:00,B-002,90,COMP40370P1,27,39.0,0.25,1,22.5,0-25,1
1,2015-11-04 16:00:00,B-002,90,COMP30250P1,22,20.0,0.25,1,22.5,0-25,2
2,2015-11-05 16:00:00,B-002,90,COMP30520P1,60,32.6667,0.0,0,0.0,0-25,3
3,2015-11-06 16:00:00,B-002,90,,0,20.3333,0.5,1,45.0,25-50,4
4,2015-11-09 16:00:00,B-002,90,COMP40660P1,53,72.5,0.25,1,22.5,0-25,0


In [8]:
for i in range(len(days)):
    df[days[i]] =(df.weekday==i)*1
df.drop('weekday', axis=1, inplace=True)
df.drop('sat', axis=1, inplace=True)
df.drop('sun', axis=1, inplace=True)


df.head(10)

Unnamed: 0,DateTime,Room,Capacity,Module,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate,estimate,occupantEstimate,mon,tues,wed,thurs,fri
0,2015-11-03 16:00:00,B-002,90,COMP40370P1,27,39.0,0.25,1,22.5,0-25,0,1,0,0,0
1,2015-11-04 16:00:00,B-002,90,COMP30250P1,22,20.0,0.25,1,22.5,0-25,0,0,1,0,0
2,2015-11-05 16:00:00,B-002,90,COMP30520P1,60,32.6667,0.0,0,0.0,0-25,0,0,0,1,0
3,2015-11-06 16:00:00,B-002,90,,0,20.3333,0.5,1,45.0,25-50,0,0,0,0,1
4,2015-11-09 16:00:00,B-002,90,COMP40660P1,53,72.5,0.25,1,22.5,0-25,1,0,0,0,0
5,2015-11-10 16:00:00,B-002,90,COMP40370P1,27,35.1667,0.25,1,22.5,0-25,0,1,0,0,0
6,2015-11-11 16:00:00,B-002,90,COMP30250P1,22,23.5,0.25,1,22.5,0-25,0,0,1,0,0
7,2015-11-12 16:00:00,B-002,90,COMP30520P1,60,49.8333,0.25,1,22.5,0-25,0,0,0,1,0
8,2015-11-13 16:00:00,B-002,90,,0,8.6667,0.25,1,22.5,0-25,0,0,0,0,1
9,2015-11-03 09:00:00,B-002,90,,0,2.0,0.0,0,0.0,0-25,0,1,0,0,0


In [9]:
times = ['9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']
df['time'] = df['DateTime'].apply(lambda dt: dt.hour)

for i in range(0,len(times)):
    df[times[i]] =(df.time==i+9)*1
df.drop('time', axis=1, inplace=True)
df.head(10)

Unnamed: 0,DateTime,Room,Capacity,Module,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate,estimate,occupantEstimate,...,thurs,fri,9AM,10AM,11AM,12PM,1PM,2PM,3PM,4PM
0,2015-11-03 16:00:00,B-002,90,COMP40370P1,27,39.0,0.25,1,22.5,0-25,...,0,0,0,0,0,0,0,0,0,1
1,2015-11-04 16:00:00,B-002,90,COMP30250P1,22,20.0,0.25,1,22.5,0-25,...,0,0,0,0,0,0,0,0,0,1
2,2015-11-05 16:00:00,B-002,90,COMP30520P1,60,32.6667,0.0,0,0.0,0-25,...,1,0,0,0,0,0,0,0,0,1
3,2015-11-06 16:00:00,B-002,90,,0,20.3333,0.5,1,45.0,25-50,...,0,1,0,0,0,0,0,0,0,1
4,2015-11-09 16:00:00,B-002,90,COMP40660P1,53,72.5,0.25,1,22.5,0-25,...,0,0,0,0,0,0,0,0,0,1
5,2015-11-10 16:00:00,B-002,90,COMP40370P1,27,35.1667,0.25,1,22.5,0-25,...,0,0,0,0,0,0,0,0,0,1
6,2015-11-11 16:00:00,B-002,90,COMP30250P1,22,23.5,0.25,1,22.5,0-25,...,0,0,0,0,0,0,0,0,0,1
7,2015-11-12 16:00:00,B-002,90,COMP30520P1,60,49.8333,0.25,1,22.5,0-25,...,1,0,0,0,0,0,0,0,0,1
8,2015-11-13 16:00:00,B-002,90,,0,8.6667,0.25,1,22.5,0-25,...,0,1,0,0,0,0,0,0,0,1
9,2015-11-03 09:00:00,B-002,90,,0,2.0,0.0,0,0.0,0-25,...,0,0,1,0,0,0,0,0,0,0


In [10]:
for i in range(2, 5):
    df['B00'+str(i)] =(df.Room=='B-00'+str(i))*1
df

Unnamed: 0,DateTime,Room,Capacity,Module,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate,estimate,occupantEstimate,...,10AM,11AM,12PM,1PM,2PM,3PM,4PM,B002,B003,B004
0,2015-11-03 16:00:00,B-002,90,COMP40370P1,27,39.0000,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
1,2015-11-04 16:00:00,B-002,90,COMP30250P1,22,20.0000,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
2,2015-11-05 16:00:00,B-002,90,COMP30520P1,60,32.6667,0.00,0,0.0,0-25,...,0,0,0,0,0,0,1,1,0,0
3,2015-11-06 16:00:00,B-002,90,,0,20.3333,0.50,1,45.0,25-50,...,0,0,0,0,0,0,1,1,0,0
4,2015-11-09 16:00:00,B-002,90,COMP40660P1,53,72.5000,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
5,2015-11-10 16:00:00,B-002,90,COMP40370P1,27,35.1667,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
6,2015-11-11 16:00:00,B-002,90,COMP30250P1,22,23.5000,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
7,2015-11-12 16:00:00,B-002,90,COMP30520P1,60,49.8333,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
8,2015-11-13 16:00:00,B-002,90,,0,8.6667,0.25,1,22.5,0-25,...,0,0,0,0,0,0,1,1,0,0
9,2015-11-03 09:00:00,B-002,90,,0,2.0000,0.00,0,0.0,0-25,...,0,0,0,0,0,0,0,1,0,0


In [11]:
df.corr()

Unnamed: 0,Capacity,NumReg,AvgNumWifiConn,PercentageEstimate,BinaryEstimate,estimate,mon,tues,wed,thurs,...,10AM,11AM,12PM,1PM,2PM,3PM,4PM,B002,B003,B004
Capacity,1.0,0.468271,0.355578,-0.045083,-0.056617,0.39296,2.6474550000000002e-17,3.7443450000000005e-17,2.194961e-17,2.2595180000000002e-17,...,-9.332719e-17,-1.5419270000000002e-17,6.248864e-17,-2.434622e-18,-2.8403930000000004e-17,3.6519330000000003e-17,3.002701e-17,-0.5,-0.5,1.0
NumReg,0.4682708,1.0,0.736217,0.344868,0.290085,0.562036,-0.04625192,0.02518073,-0.04275289,0.09583169,...,-0.009564565,0.0570458,0.1000863,0.009564565,-0.024253,-0.01161411,-0.1762613,-0.1253355,-0.3429352,0.4682708
AvgNumWifiConn,0.3555784,0.736217,1.0,0.598574,0.454588,0.765715,-0.008559815,0.127076,-0.1088693,0.08762523,...,-0.01575205,0.02496158,0.1411035,-0.005947925,0.1159951,-0.05334014,-0.1693601,-0.2275643,-0.1280142,0.3555784
PercentageEstimate,-0.04508348,0.344868,0.598574,1.0,0.769572,0.816815,0.05071892,0.1150197,-0.1022397,-0.02555993,...,0.0,0.06426169,0.1445888,-0.04819627,0.1285234,0.0,-0.1767197,-0.07889609,0.1239796,-0.04508348
BinaryEstimate,-0.05661669,0.290085,0.454588,0.769572,1.0,0.634861,0.2052355,0.06954706,-0.1711928,0.0213991,...,-0.0491772,0.1324002,0.1929259,-0.07944009,0.1324002,0.04161148,-0.1702288,0.0495396,0.007077086,-0.05661669
estimate,0.39296,0.562036,0.765715,0.816815,0.634861,1.0,0.04876492,0.1268725,-0.1533565,0.04715219,...,0.006579753,0.04302146,0.1017331,-0.007592023,0.09262268,-0.01973926,-0.198911,-0.2540037,-0.1389563,0.39296
mon,2.6474550000000002e-17,-0.046252,-0.00856,0.050719,0.205235,0.048765,1.0,-0.1889822,-0.1889822,-0.1889822,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.301043e-17,1.301043e-17,1.301043e-17
tues,3.7443450000000005e-17,0.025181,0.127076,0.11502,0.069547,0.126872,-0.1889822,1.0,-0.2857143,-0.2857143,...,1.5420550000000002e-17,1.5887840000000002e-17,1.635513e-17,1.3084100000000001e-17,1.355139e-17,1.401868e-17,1.448597e-17,1.3113280000000001e-18,1.3113280000000001e-18,1.3113280000000001e-18
wed,2.194961e-17,-0.042753,-0.108869,-0.10224,-0.171193,-0.153356,-0.1889822,-0.2857143,1.0,-0.2857143,...,1.5420550000000002e-17,1.2149520000000001e-17,1.2616810000000002e-17,1.3084100000000001e-17,1.355139e-17,1.401868e-17,1.448597e-17,1.3113280000000001e-18,1.3113280000000001e-18,1.3113280000000001e-18
thurs,2.2595180000000002e-17,0.095832,0.087625,-0.02556,0.021399,0.047152,-0.1889822,-0.2857143,-0.2857143,1.0,...,1.5420550000000002e-17,1.2149520000000001e-17,1.2616810000000002e-17,1.3084100000000001e-17,1.355139e-17,1.401868e-17,1.448597e-17,1.966992e-18,1.966992e-18,1.966992e-18


In [12]:
df.shape

(216, 26)

In [13]:
intercept = pd.DataFrame({'Intercept':np.ones(216)})
intercept

Unnamed: 0,Intercept
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0
5,1.0
6,1.0
7,1.0
8,1.0
9,1.0


In [14]:
x = pd.concat([intercept, df[['Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004' ]]], axis = 1)
y = df['occupantEstimate']
x

Unnamed: 0,Intercept,Capacity,NumReg,AvgNumWifiConn,mon,tues,wed,thurs,fri,9AM,10AM,11AM,12PM,1PM,2PM,3PM,4PM,B002,B003,B004
0,1.0,90,27,39.0000,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0
1,1.0,90,22,20.0000,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0
2,1.0,90,60,32.6667,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0
3,1.0,90,0,20.3333,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0
4,1.0,90,53,72.5000,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
5,1.0,90,27,35.1667,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0
6,1.0,90,22,23.5000,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0
7,1.0,90,60,49.8333,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0
8,1.0,90,0,8.6667,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0
9,1.0,90,0,2.0000,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0


# Test 1: All features but modules. 

In [15]:
log = LogisticRegression().fit(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004']], y)

In [16]:
predictions = log.predict(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004']])
log.score(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004']], y)

0.75

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004']], y, test_size=0.2)

In [18]:
log_train = LogisticRegression().fit(x_train, y_train)

pred = log_train.predict(x_train)
print(metrics.accuracy_score(y_train, pred))

pred = log_train.predict(x_test)
print(metrics.accuracy_score(y_test, pred))

0.75
0.659090909091


# Test 2: Now without rooms

In [19]:
log2 = LogisticRegression().fit(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

In [20]:
predictions2 = log2.predict(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']])
log2.score(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

0.75462962962962965

In [36]:
x_train2, x_test2, y_train2, y_test2 = train_test_split(x[['Intercept','Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y, test_size=0.2)
log_train2 = LogisticRegression().fit(x_train2, y_train2)

pred2 = log_train2.predict(x_train2)
print(metrics.accuracy_score(y_train2, pred2))

pred2 = log_train2.predict(x_test2)
print(metrics.accuracy_score(y_test2, pred2))

0.755813953488
0.659090909091


# Test 3: Now without rooms and Capacity

In [22]:
log3 = LogisticRegression().fit(x[['Intercept', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

In [23]:
predictions3 = log3.predict(x[['Intercept', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']])
log3.score(x[['Intercept', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

0.72685185185185186

In [37]:
x_train3, x_test3, y_train3, y_test3 = train_test_split(x[['Intercept', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y, test_size=0.3)
log_train3 = LogisticRegression().fit(x_train3, y_train3)

pred3 = log_train3.predict(x_train3)
print(metrics.accuracy_score(y_train3, pred3))

pred3 = log_train3.predict(x_test3)
print(metrics.accuracy_score(y_test3, pred3))

0.741721854305
0.707692307692


# Test 4: Now without rooms and Number registered

In [25]:
log4 = LogisticRegression().fit(x[['Intercept', 'Capacity', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

In [26]:
predictions4 = log4.predict(x[['Intercept', 'Capacity', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']])
log4.score(x[['Intercept', 'Capacity', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y)

0.75

In [38]:
x_train4, x_test4, y_train4, y_test4 = train_test_split(x[['Intercept', 'Capacity', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM']], y, test_size=0.3)
log_train4 = LogisticRegression().fit(x_train4, y_train4)

pred4 = log_train4.predict(x_train4)
print(metrics.accuracy_score(y_train4, pred4))

pred4 = log_train4.predict(x_test4)
print(metrics.accuracy_score(y_test4, pred4))

0.735099337748
0.707692307692


# Test 5: Now without Number Registered or capacity

In [28]:
log5 = LogisticRegression().fit(x[['Intercept', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM',  'B002', 'B003', 'B004']], y)

In [29]:
predictions5 = log5.predict(x[['Intercept',  'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM',  'B002', 'B003', 'B004']])
log5.score(x[['Intercept', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004']], y)

0.75462962962962965

In [39]:
x_train5, x_test5, y_train5, y_test5 = train_test_split(x[['Intercept', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM',  'B002', 'B003', 'B004']], y, test_size=0.3)
log_train5 = LogisticRegression().fit(x_train5, y_train5)

pred5 = log_train5.predict(x_train5)
print(metrics.accuracy_score(y_train5, pred5))

pred5 = log_train5.predict(x_test5)
print(metrics.accuracy_score(y_test5, pred5))

0.788079470199
0.615384615385


# Test 6: With just Wifi Log data

In [31]:
log6 =  LogisticRegression().fit(x[['Intercept', 'AvgNumWifiConn']], y)

In [32]:
predictions6 = log6.predict(x[['Intercept',  'AvgNumWifiConn']])
log6.score(x[['Intercept', 'AvgNumWifiConn']], y)

0.69907407407407407

In [40]:
x_train6, x_test6, y_train6, y_test6 = train_test_split(x[['Intercept', 'AvgNumWifiConn']], y, test_size=0.3)
log_train6 = LogisticRegression().fit(x_train6, y_train6)

pred6 = log_train6.predict(x_train6)
print(metrics.accuracy_score(y_train6, pred6))

pred6 = log_train6.predict(x_test6)
print(metrics.accuracy_score(y_test6, pred6))

0.695364238411
0.723076923077


### Other stuff

In [34]:
x_best = pd.concat([intercept, df[['Capacity', 'NumReg', 'AvgNumWifiConn', 'mon', 'tues', 'wed', 'thurs', 'fri', '9AM', '10AM', '11AM', '12PM', '1PM', '2PM', '3PM', '4PM', 'B002', 'B003', 'B004' ]]], axis = 1)


In [35]:
model_scores = cross_val_score(LogisticRegression(), x_best, y, scoring = 'accuracy', cv = 10)
print('Logistic regression, Target feature: Estimate of occupancy using average connections\n')
print(model_scores)
print('Mean Score: ',model_scores.mean())

Logistic regression, Target feature: Estimate of occupancy using average connections

[ 0.52        0.60869565  0.56521739  0.69565217  0.71428571  0.71428571
  0.8         0.55        0.7         0.6       ]
Mean Score:  0.646813664596


