# Naive approaches to forcasting as baseline

### Importing packages that'll be used

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import statistics

### Reading the data as series objects from the generated csv files

In [2]:
basedir = '../../../tlm-data/train/generated/'
data_files = []
with os.scandir(basedir) as dir:
    for item in dir:
        if item.is_file() and item.name[-4:] == '.csv':
            data_files.append(item.name)
            
print(*data_files, sep='\n')
print(f'\nThere are {len(data_files)} csv files with data')

Battery1_telemetryOutputData_2020_02_05_15_48_33.csv
Battery2_telemetryOutputData_2020_02_05_15_48_33.csv
CmdRcv_telemetryOutputData_2020_02_05_15_48_33.csv
RSSI1_telemetryOutputData_2020_02_05_15_48_33.csv
RSSI2_telemetryOutputData_2020_02_05_15_48_33.csv
TlmTx_telemetryOutputData_2020_02_05_15_48_33.csv

There are 6 csv files with data


In [3]:
def read_to_series():
    series = []
    for csv in data_files:
        s = pd.Series.from_csv(f'{basedir}{csv}')
        series.append(s)
        print(csv)
        print(s.head())
        print()
    return series   

In [4]:
series = read_to_series()

Battery1_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.520994    12.5
2020-02-05 07:48:33.622164    11.2
2020-02-05 07:48:33.722767    11.3
2020-02-05 07:48:33.823252    11.7
2020-02-05 07:48:33.923965    11.1
dtype: float64

Battery2_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.540301    14.9
2020-02-05 07:48:33.641321    15.8
2020-02-05 07:48:33.742324    16.1
2020-02-05 07:48:33.843091    10.6
2020-02-05 07:48:33.943734    10.3
dtype: float64

CmdRcv_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.563606    10181.8
2020-02-05 07:48:33.665165    10402.4
2020-02-05 07:48:33.766077     9623.3
2020-02-05 07:48:33.867295     9602.8
2020-02-05 07:48:33.968509    10170.6
dtype: float64

RSSI1_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.506520   -101.7
2020-02-05 07:48:33.607553   -109.9
2020-02-05 07:48:33.708445   -106.7
2020-02-05 07:48:33.809109   -100.7
2020-02-05 07:48:33.909766   -102.2
dtype: float64

RSSI2_tel

  infer_datetime_format=infer_datetime_format)


### Very Naive forcast: Using previous known value to predict the next

In [5]:
def prev_predict(series):
    errors = []
    for i in range(1, len(series)):
        pred = series[i]
        actual = series[i-1]
        error = ( pred - actual ) / actual * 100
        errors.append(error)
        
    print('Max error:', max(errors), '%')
    print('Min error:', min(errors), '%')
    print('Mean error:', statistics.mean(errors), '%')

In [6]:
for ser in series:
    prev_predict(ser)
    print()

Max error: 18.181818181818183 %
Min error: -15.384615384615385 %
Mean error: 0.23262516886965956 %

Max error: 140.0 %
Min error: -58.235294117647065 %
Mean error: 6.46578301787709 %

Max error: 10.330578512396695 %
Min error: -9.26406348358546 %
Mean error: 0.08358644745474526 %

Max error: 9.799999999999997 %
Min error: -8.91719745222931 %
Mean error: 0.07694231549336326 %

Max error: 32.3756906077348 %
Min error: -24.91666666666667 %
Mean error: 0.6794908060264219 %

Max error: 14.953773382068361 %
Min error: -12.840430510060838 %
Mean error: 0.16138592891792855 %



### Moving Average Forecast

In [7]:
def moving_avg(ser, window):
    errors = []
    for i in range(window, len(ser)):
        vals = ser[i-window:i]
        avg = statistics.mean(vals)
        pred, actual = avg, ser[i]
        error = ( pred - actual ) / actual * 100
        errors.append(error)
        
    print('Max error:', max(errors), '%')
    print('Min error:', min(errors), '%')
    print('Mean error:', statistics.mean(errors), '%')

In [8]:
wins = 10, 100, 1000
for ser in series:
    print()
    for window in wins:
        print(f'Window size {window}')
        moving_avg(ser, window)
        print()
    print('- - - - - ')


Window size 10
Max error: 13.181818181818175 %
Min error: -11.085271317829456 %
Mean error: 0.2305390281975043 %

Window size 100
Max error: 10.590909090909099 %
Min error: -9.11538461538462 %
Mean error: 0.2332280996994721 %

Window size 1000
Max error: 9.259090909090904 %
Min error: -8.07846153846154 %
Mean error: 0.2080329012593448 %

- - - - - 

Window size 10
Max error: 100.56338028169016 %
Min error: -45.50295857988165 %
Mean error: 6.501086163090823 %

Window size 100
Max error: 79.98591549295776 %
Min error: -33.78823529411765 %
Mean error: 6.505611226546082 %

Window size 1000
Max error: 73.44428571428571 %
Min error: -30.619999999999997 %
Mean error: 6.538203071176077 %

- - - - - 

Window size 10
Max error: 7.900661763932296 %
Min error: -6.889266774267928 %
Mean error: 0.08363925980954247 %

Window size 100
Max error: 5.791385535193632 %
Min error: -5.267486299737914 %
Mean error: 0.07540240122956408 %

Window size 1000
Max error: 5.38192057092934 %
Min error: -4.900682060