In [29]:
from datetime import datetime, timedelta, timezone
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time

In [30]:
import csv

def head(file_path, n=5):
    with open(file_path) as f:
        reader = csv.reader(f)
        headers = next(reader)
        print('\t'.join(headers))
        for i, row in enumerate(reader):
            if i >= n:
                break
            print('\t'.join(row))

In [31]:
head('HomeC-meter1_2015.csv', n=10)

Date & Time	use [kW]	gen [kW]	House overall [kW]	Dishwasher [kW]	Furnace 1 [kW]	Furnace 2 [kW]	Home office [kW]	Fridge [kW]	Wine cellar [kW]	Garage door [kW]	Kitchen 12 [kW]	Kitchen 14 [kW]	Kitchen 38 [kW]	Barn [kW]	Well [kW]	Microwave [kW]	Living room [kW]	Solar [kW]
01-01-2015 00:00	1.167223333	0	1.167223333	0.000235556	0.229378889	0.331326111	0.01859	0.067467222	0.00442	0.009903889	0.000437222	6.00E-05	7.22E-06	0	0	0	0	0
01-01-2015 00:30	1.171444444	0	1.171444444	0.000225	0.228757778	0.300047778	0.018603889	0.108881111	0.004456111	0.010001111	0.000347222	8.72E-05	7.22E-06	0	0	0	0	0
01-01-2015 01:00	1.151473889	0	1.151473889	0.000229444	0.229445556	0.323098889	0.01862	0.005851111	0.004449444	0.009916111	0.000611667	3.06E-05	6.67E-06	0	0	0	0	0
01-01-2015 01:30	1.398982222	0	1.398982222	0.000208889	0.277065556	0.314398889	0.018593333	0.005925	0.004457222	0.009871667	0.000693333	3.89E-06	7.22E-06	0	0	0	0	0
01-01-2015 02:00	1.080775	0	1.080775	0.000238889	0.228736111	0.30856	0.018637778	

In [32]:
dataset = pd.read_csv('HomeC-meter1_2015.csv')

In [33]:
# Drop the line with missing values
dataset.dropna(inplace=True)

In [34]:
dataset['Date & Time'].head()

0    01-01-2015 00:00
1    01-01-2015 00:30
2    01-01-2015 01:00
3    01-01-2015 01:30
4    01-01-2015 02:00
Name: Date & Time, dtype: object

In [35]:
time_index = pd.date_range('2016-01-01 05:00', periods=len(dataset),  freq='min')  
time_index = pd.DatetimeIndex(time_index)
dataset = dataset.set_index(time_index)
dataset = dataset.drop(['Date & Time'], axis=1)
dataset.iloc[np.r_[0:5,-5:0]].iloc[:,0]

2016-01-01 05:00:00    1.167223
2016-01-01 05:01:00    1.171444
2016-01-01 05:02:00    1.151474
2016-01-01 05:03:00    1.398982
2016-01-01 05:04:00    1.080775
2016-01-08 12:01:00    0.595662
2016-01-08 12:02:00    0.654633
2016-01-08 12:03:00    1.020466
2016-01-08 12:04:00    0.711790
2016-01-08 12:05:00    0.526727
Name: use [kW], dtype: float64

In [None]:
# dataset = dataset.drop(columns=['Date & Time'])
# dataset.shape

In [37]:
dataset.isna().sum()

use [kW]              0
gen [kW]              0
House overall [kW]    0
Dishwasher [kW]       0
Furnace 1 [kW]        0
Furnace 2 [kW]        0
Home office [kW]      0
Fridge [kW]           0
Wine cellar [kW]      0
Garage door [kW]      0
Kitchen 12 [kW]       0
Kitchen 14 [kW]       0
Kitchen 38 [kW]       0
Barn [kW]             0
Well [kW]             0
Microwave [kW]        0
Living room [kW]      0
Solar [kW]            0
dtype: int64

In [38]:
dataset.columns = [col.replace(' [kW]', '') for col in dataset.columns]
dataset.columns

Index(['use', 'gen', 'House overall', 'Dishwasher', 'Furnace 1', 'Furnace 2',
       'Home office', 'Fridge', 'Wine cellar', 'Garage door', 'Kitchen 12',
       'Kitchen 14', 'Kitchen 38', 'Barn', 'Well', 'Microwave', 'Living room',
       'Solar'],
      dtype='object')

In [39]:
data_daily = dataset['House overall'].resample('d').mean()

In [54]:
import numpy as np

class ARIMA:
    def __init__(self, p, d, q):
        self.p = p
        self.d = d
        self.q = q
    
    def fit(self, data):
      self.data = np.array(data).reshape(-1, 1)
      self.n = len(self.data)
      self.mu = np.mean(self.data)
      self.phi = np.zeros((self.p,))
      self.theta = np.zeros((self.q,))
    
    # Initialize residuals
      self.residuals = np.zeros((self.n,))
      self.residuals[:self.p] = self.data[:self.p] - self.mu
    
    # Fit AR(p) model
      if self.p > 0:
          for i in range(self.p, self.n):
              y = self.data[i-self.p:i]
              self.phi = np.linalg.inv(y.T @ y) @ y.T @ self.residuals[i-1]
              self.residuals[i] = self.data[i] - self.mu - self.phi @ self.residuals[i-self.p:i][::-1]
            
    # Fit MA(q) model
      if self.q > 0:
          for i in range(self.q, self.n):
              y = self.residuals[i-self.q:i]
              self.theta = np.linalg.inv(y.T @ y) @ y.T @ self.residuals[i]
              self.residuals[i] -= self.theta @ self.residuals[i-self.q:i][::-1]

                
    def predict(self, n):
        forecast = np.zeros((n,))
        for i in range(n):
            if self.p > 0 and i >= self.p:
                ar = self.residuals[i-self.p:i][::-1] @ self.phi
            else:
                ar = 0
                
            if self.q > 0 and i >= self.q:
                ma = self.residuals[i-self.q:i][::-1] @ self.theta
            else:
                ma = 0
                
            forecast[i] = self.mu + ar + ma
        
        return forecast


In [55]:
size = int(len(data_daily)*0.7)
train = data_daily[:size]
test = data_daily[size:]
print('Number of points in series:', len(data_daily))
print('Number of points in train:', len(train))
print('Number of points in test:', len(test))

Number of points in series: 8
Number of points in train: 5
Number of points in test: 3


In [56]:
arima = ARIMA(p=1, d=0, q=1)

In [57]:
arima.fit(data_daily)

ValueError: ignored

In [51]:
forecasts = arima.predict(n=3)
print(forecasts)

[0.95816125 0.95816125 0.95816125]
