# Time Series Linear Regression w/ Sklearn

### Load Necessary Libraries

In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import precision_recall_curve, average_precision_score, auc, roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
print("Done")

Done


In [19]:
df = pd.read_csv("esp8266_readings - Sheet1.csv")
df.columns

Index(['Date', 'Event Name', 'Value1', 'Value2', 'Value3'], dtype='object')

In [20]:
df['Date']

0       November 18, 2019 at 08:44PM
1       November 18, 2019 at 08:45PM
2       November 18, 2019 at 08:46PM
3       November 18, 2019 at 08:47PM
4       November 18, 2019 at 08:48PM
                    ...             
5386    November 22, 2019 at 03:16PM
5387    November 22, 2019 at 03:17PM
5388    November 22, 2019 at 03:18PM
5389    November 22, 2019 at 03:19PM
5390    November 22, 2019 at 03:20PM
Name: Date, Length: 5391, dtype: object

In [21]:
df = df.rename(columns={'Value1': 'Digital Button', 'Value2':'Photoresistor', 'Value3':'Temp; Humidity'})
df.head(15)

Unnamed: 0,Date,Event Name,Digital Button,Photoresistor,Temp; Humidity
0,"November 18, 2019 at 08:44PM",esp8266_readings,1,204,22.5
1,"November 18, 2019 at 08:45PM",esp8266_readings,1,208,22.4
2,"November 18, 2019 at 08:46PM",esp8266_readings,1,200,22.4
3,"November 18, 2019 at 08:47PM",esp8266_readings,1,204,22.4
4,"November 18, 2019 at 08:48PM",esp8266_readings,1,330,22.4
5,"November 18, 2019 at 08:49PM",esp8266_readings,1,207,22.4
6,"November 18, 2019 at 08:50PM",esp8266_readings,1,200,22.4
7,"November 18, 2019 at 08:51PM",esp8266_readings,1,207,22.4
8,"November 18, 2019 at 08:52PM",esp8266_readings,1,201,22.4
9,"November 18, 2019 at 08:53PM",esp8266_readings,1,195,22.40;37.00


In [22]:
df.shape

(5391, 5)

In [23]:
df = df.drop([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [24]:
df[['Temp', 'Humidity']] = df['Temp; Humidity'].str.split(';', expand=True)
df

Unnamed: 0,Date,Event Name,Digital Button,Photoresistor,Temp; Humidity,Temp,Humidity
9,"November 18, 2019 at 08:53PM",esp8266_readings,1,195,22.40;37.00,22.40,37.00
10,"November 18, 2019 at 08:54PM",esp8266_readings,0,310,22.40;37.00,22.40,37.00
11,"November 18, 2019 at 08:55PM",esp8266_readings,0,209,22.40;37.00,22.40,37.00
12,"November 18, 2019 at 08:56PM",esp8266_readings,0,210,22.40;38.00,22.40,38.00
13,"November 18, 2019 at 08:57PM",esp8266_readings,0,208,22.80;36.00,22.80,36.00
...,...,...,...,...,...,...,...
5386,"November 22, 2019 at 03:16PM",esp8266_readings,1,702,18.60;49.00,18.60,49.00
5387,"November 22, 2019 at 03:17PM",esp8266_readings,1,681,18.60;49.00,18.60,49.00
5388,"November 22, 2019 at 03:18PM",esp8266_readings,1,668,18.60;49.00,18.60,49.00
5389,"November 22, 2019 at 03:19PM",esp8266_readings,1,678,18.60;49.00,18.60,49.00


In [25]:
df = df.drop(columns="Temp; Humidity")
df

Unnamed: 0,Date,Event Name,Digital Button,Photoresistor,Temp,Humidity
9,"November 18, 2019 at 08:53PM",esp8266_readings,1,195,22.40,37.00
10,"November 18, 2019 at 08:54PM",esp8266_readings,0,310,22.40,37.00
11,"November 18, 2019 at 08:55PM",esp8266_readings,0,209,22.40,37.00
12,"November 18, 2019 at 08:56PM",esp8266_readings,0,210,22.40,38.00
13,"November 18, 2019 at 08:57PM",esp8266_readings,0,208,22.80,36.00
...,...,...,...,...,...,...
5386,"November 22, 2019 at 03:16PM",esp8266_readings,1,702,18.60,49.00
5387,"November 22, 2019 at 03:17PM",esp8266_readings,1,681,18.60,49.00
5388,"November 22, 2019 at 03:18PM",esp8266_readings,1,668,18.60,49.00
5389,"November 22, 2019 at 03:19PM",esp8266_readings,1,678,18.60,49.00


### Now, let's clean up the 'Date' Column

1. Separate the Date column and split the date and time values
2. Then, convert 12 hour time -> 24 hour time and add those values to a list (**'times'**)
3. After, let's convert that list to a dataframe, split the hours and minutes, and add the hour column back to our core dataframe, **df**. 

In [26]:
date_df = pd.DataFrame(df['Date'])
date_df = date_df['Date'].str.split('at', expand = True)

In [27]:
def hourConverter():
    times = []
    for val in date_df[1].iteritems():
        if str(val[1][-2:]) == 'AM':
            times.append(int(val[1][0:3]))
        elif str(val[1][-2:]) == 'PM':
            times.append(int(val[1][0:3]) + 12)
    return times
hourConverter()
%%capture

UsageError: Line magic function `%%capture` not found.


In [28]:
r, c = df.shape
len(hourConverter()) == r

True

In [29]:
len(df) == len(hourConverter())

True

In [30]:
time_df = pd.DataFrame(hourConverter())
time_df = time_df.rename(columns={0: 'Hour'})
time_df.dtypes

Hour    int64
dtype: object

In [31]:
time_df = pd.get_dummies(time_df.astype(str))
time_df

Unnamed: 0,Hour_1,Hour_10,Hour_11,Hour_12,Hour_13,Hour_14,Hour_15,Hour_16,Hour_17,Hour_18,...,Hour_22,Hour_23,Hour_24,Hour_3,Hour_4,Hour_5,Hour_6,Hour_7,Hour_8,Hour_9
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5377,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5378,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5379,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5380,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Need to Append the time_df dataframe to main dataframe

In [32]:
df.index = np.arange(0, len(df))

In [33]:
df = pd.concat([df, pd.DataFrame(hourConverter())], axis =1)

In [34]:
df = df.rename(columns={0:'Time'}).drop(columns=['Date'])

In [35]:
df

Unnamed: 0,Event Name,Digital Button,Photoresistor,Temp,Humidity,Time
0,esp8266_readings,1,195,22.40,37.00,20
1,esp8266_readings,0,310,22.40,37.00,20
2,esp8266_readings,0,209,22.40,37.00,20
3,esp8266_readings,0,210,22.40,38.00,20
4,esp8266_readings,0,208,22.80,36.00,20
...,...,...,...,...,...,...
5377,esp8266_readings,1,702,18.60,49.00,15
5378,esp8266_readings,1,681,18.60,49.00,15
5379,esp8266_readings,1,668,18.60,49.00,15
5380,esp8266_readings,1,678,18.60,49.00,15


# To Do: create time series regression model