# Naive approaches to forcasting as baseline

### Importing packages that'll be used

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import statistics

### Reading the data as series objects from the generated csv files

In [3]:
basedir = '../../../tlm-data/train/generated/'
data_files = []
with os.scandir(basedir) as dir:
    for item in dir:
        if item.is_file() and item.name[-4:] == '.csv':
            data_files.append(item.name)
            
print(*data_files, sep='\n')
print(f'\nThere are {len(data_files)} csv files with data')

Battery1_telemetryOutputData_2020_02_05_15_48_33.csv
Battery2_telemetryOutputData_2020_02_05_15_48_33.csv
CmdRcv_telemetryOutputData_2020_02_05_15_48_33.csv
RSSI1_telemetryOutputData_2020_02_05_15_48_33.csv
RSSI2_telemetryOutputData_2020_02_05_15_48_33.csv
TlmTx_telemetryOutputData_2020_02_05_15_48_33.csv

There are 6 csv files with data


In [4]:
def read_to_series():
    series = []
    for csv in data_files:
        s = pd.Series.from_csv(f'{basedir}{csv}')
        s.name = 'Values'
        series.append(s)
        print(csv)
        print(s.head())
        print()
    return series   

In [5]:
series = read_to_series()

  infer_datetime_format=infer_datetime_format)


Battery1_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.520994    12.5
2020-02-05 07:48:33.622164    11.2
2020-02-05 07:48:33.722767    11.3
2020-02-05 07:48:33.823252    11.7
2020-02-05 07:48:33.923965    11.1
Name: Values, dtype: float64

Battery2_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.540301    14.9
2020-02-05 07:48:33.641321    15.8
2020-02-05 07:48:33.742324    16.1
2020-02-05 07:48:33.843091    10.6
2020-02-05 07:48:33.943734    10.3
Name: Values, dtype: float64

CmdRcv_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.563606    10181.8
2020-02-05 07:48:33.665165    10402.4
2020-02-05 07:48:33.766077     9623.3
2020-02-05 07:48:33.867295     9602.8
2020-02-05 07:48:33.968509    10170.6
Name: Values, dtype: float64

RSSI1_telemetryOutputData_2020_02_05_15_48_33.csv
2020-02-05 07:48:33.506520   -101.7
2020-02-05 07:48:33.607553   -109.9
2020-02-05 07:48:33.708445   -106.7
2020-02-05 07:48:33.809109   -100.7
2020-02-05 07:48:33

In [6]:
def to_data_frames():
    frames = []
    for ser in series:
        frames.append( ser.to_frame() )
    return frames

In [7]:
# dfs is shorthand for data frames
dfs = to_data_frames()

### Very Naive forcast: Using previous known value to predict the next

In [8]:
def prev_predict(series):
    errors = []
    predictions = ['-'] # first value is will be blank
    for i in range(1, len(series)):
        pred = series[i-1]
        actual = series[i]
        error = ( pred - actual ) / actual * 100
        errors.append(error)
        predictions.append(pred)
        
    print('Max error:', max(errors), '%')
    print('Min error:', min(errors), '%')
    print('Mean error:', statistics.mean(errors), '%')

    return predictions

In [9]:
for i in range(len(series)):
    preds = prev_predict(series[i])
    # dfs[i]['Prev'] = preds
    print()

Max error: 18.181818181818183 %
Min error: -15.384615384615385 %
Mean error: 0.23228731208412653 %

Max error: 139.43661971830988 %
Min error: -58.333333333333336 %
Mean error: 6.494594292851643 %

Max error: 10.20991664301558 %
Min error: -9.363295880149812 %
Mean error: 0.08344469570165197 %

Max error: 9.7902097902098 %
Min error: -8.925318761384332 %
Mean error: 0.07561300386264153 %

Max error: 33.18534961154273 %
Min error: -24.457429048414024 %
Mean error: 0.6793229804497425 %

Max error: 14.732094921078067 %
Min error: -13.008510240344142 %
Mean error: 0.16338570175254358 %



### Moving Average Forecast

In [10]:
def moving_avg(ser, window):
    errors = []
    predictions = []
    for i in range(window, len(ser)):
        vals = ser[i-window:i]
        avg = statistics.mean(vals)
        pred, actual = avg, ser[i]
        error = ( pred - actual ) / actual * 100
        errors.append(error)
        predictions.append(pred)
        
    print('Max error:', max(errors), '%')
    print('Min error:', min(errors), '%')
    print('Mean error:', statistics.mean(errors), '%')
    
    # adding missing values
    non_values = [ '-' for x in range(window) ]
    predictions = non_values + predictions
    return predictions

In [11]:
wins = 10, 100, 1000
for i in range(len(series)):
    print()
    for window in wins:
        print(f'Window size {window}')
        preds = moving_avg(series[i], window)
        dfs[i][f'{window}th Moving Avg'] = preds
        print()
    print('- - - - - ')


Window size 10
Max error: 13.181818181818175 %
Min error: -11.085271317829456 %
Mean error: 0.2305390281975043 %

Window size 100
Max error: 10.590909090909099 %
Min error: -9.11538461538462 %
Mean error: 0.2332280996994721 %

Window size 1000
Max error: 9.259090909090904 %
Min error: -8.07846153846154 %
Mean error: 0.2080329012593448 %

- - - - - 

Window size 10
Max error: 100.56338028169016 %
Min error: -45.50295857988165 %
Mean error: 6.501086163090823 %

Window size 100
Max error: 79.98591549295776 %
Min error: -33.78823529411765 %
Mean error: 6.505611226546082 %

Window size 1000
Max error: 73.44428571428571 %
Min error: -30.619999999999997 %
Mean error: 6.538203071176077 %

- - - - - 

Window size 10
Max error: 7.900661763932296 %
Min error: -6.889266774267928 %
Mean error: 0.08363925980954247 %

Window size 100
Max error: 5.791385535193632 %
Min error: -5.267486299737914 %
Mean error: 0.07540240122956408 %

Window size 1000
Max error: 5.38192057092934 %
Min error: -4.900682060

In [12]:
dfs[0][0:2000]

Unnamed: 0,Values,10th Moving Avg,100th Moving Avg,1000th Moving Avg
2020-02-05 07:48:33.520994,12.5,-,-,-
2020-02-05 07:48:33.622164,11.2,-,-,-
2020-02-05 07:48:33.722767,11.3,-,-,-
2020-02-05 07:48:33.823252,11.7,-,-,-
2020-02-05 07:48:33.923965,11.1,-,-,-
2020-02-05 07:48:34.024591,11.1,-,-,-
2020-02-05 07:48:34.125257,11.5,-,-,-
2020-02-05 07:48:34.226037,12.0,-,-,-
2020-02-05 07:48:34.326727,11.5,-,-,-
2020-02-05 07:48:34.427272,11.4,-,-,-


In [16]:
def save_engineered_csvs():
    for i in range(len(data_files)):
        dfs[i].to_csv(f'{basedir}engineered/{data_files[i]}')
        print(f'Wrote {len(dfs[i])} lines to generated/engineered/{data_files[i]}')

In [17]:
save_engineered_csvs()

Wrote 6893 lines to generated/engineered/Battery1_telemetryOutputData_2020_02_05_15_48_33.csv
Wrote 6894 lines to generated/engineered/Battery2_telemetryOutputData_2020_02_05_15_48_33.csv
Wrote 6893 lines to generated/engineered/CmdRcv_telemetryOutputData_2020_02_05_15_48_33.csv
Wrote 6894 lines to generated/engineered/RSSI1_telemetryOutputData_2020_02_05_15_48_33.csv
Wrote 6894 lines to generated/engineered/RSSI2_telemetryOutputData_2020_02_05_15_48_33.csv
Wrote 6893 lines to generated/engineered/TlmTx_telemetryOutputData_2020_02_05_15_48_33.csv
