In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

import utilities

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [None]:
LINE = 'MM15'
# BB16      22282
# BB61      21179
# BB62      22005
# MM15      23108
# TT7       15122

In [None]:
""" BUS_LINES = ['BB16', 'BB61', 'BB62', 'MM15']

df_list = []
for bus in BUS_LINES:
    file = f'/work/bde/Data/NYU-METS/Multivariate_Dataset/dataset/{LINE}/{LINE}.txt'

    df_list.append(pd.read_csv(file, names=['bandwidth', 'LTE-neighbors', 'RSSI', 'RSRQ', 'ENodeB-change', 'time-advance', 'speed', 'band']))

df = pd.concat(df_list)
df.info() """

In [None]:
file = f'/work/bde/Data/NYU-METS/Multivariate_Dataset/dataset/{LINE}/{LINE}.txt'

df = pd.read_csv(file, names=['bandwidth', 'LTE-neighbors', 'RSSI', 'RSRQ', 'ENodeB-change', 'time-advance', 'speed', 'band'])
#df = pd.read_csv(file, header=None)
#df.isnull().values.any()
df.info()

In [None]:
df['bandwidth'] = pd.to_numeric(df['bandwidth'])
df['LTE-neighbors'] = pd.to_numeric(df['LTE-neighbors'])
df['RSSI'] = pd.to_numeric(df['RSSI'])
df['RSRQ'] = pd.to_numeric(df['RSRQ'])
df['ENodeB-change'] = pd.to_numeric(df['ENodeB-change'])
df['time-advance'] = pd.to_numeric(df['time-advance'])
df['speed'] = pd.to_numeric(df['speed'])
df['band'] = pd.to_numeric(df['band'])

df = utilities.reduce_mem_usage_32(df)

df['date'] = pd.date_range(start='1/1/2023', periods=len(df), freq='S')
df.head()

df.info()
#df.info(show_counts=True)
#df.head()
#df.plot(subplots=True, layout=(3,3), figsize=(40, 25))
#df.to_csv('5Gdataset-{}.csv'.format(file), encoding='utf-8', index=False)

In [None]:
df.head()

In [None]:
df.to_csv(f'/work/bde/Data/NYU-preprocessed/{LINE}/{LINE}.csv', encoding='utf-8', index=False)
df.to_csv(f'data/NYU-METS/{LINE}.csv', encoding='utf-8', index=False)

#df.to_csv(f'/work/bde/Data/NYU-preprocessed/BUS_LINES.csv', encoding='utf-8', index=False)
#df.to_csv(f'data/NYU-METS/BUS_LINES.csv', encoding='utf-8', index=False)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def multi_corr(feature):
    fig, ax = plt.subplots(1,3, figsize=(20, 6))

    # when the Pearson and Spearman values are not much different, 
    # our data tends to not have extreme values (outliers)
    corr1 = df.corr('pearson')[[feature]].sort_values(by=feature, ascending=False)
    corr2 = df.corr('spearman')[[feature]].sort_values(by=feature, ascending=False)

    corr3 = df.corr('kendall')[[feature]].sort_values(by=feature, ascending=False)
    #ordinal correlation (Spearman & Kendall Tau)

    sns.heatmap(corr1, ax=ax[0], annot=True)
    sns.heatmap(corr2, ax=ax[1], annot=True)
    sns.heatmap(corr3, ax=ax[2], annot=True)

    return corr1, corr2, corr3

In [None]:
corr1, corr2, corr3 = multi_corr('bandwidth')

In [None]:
corr1['corr1'] = corr1
corr1['corr2'] = corr2
corr1['corr3'] = corr3

corr_df = corr1[['corr1', 'corr2', 'corr3']]
corr_df['average'] = corr_df.mean(axis=1)
corr_df

In [None]:
len(df) * 0.2
scope = 4621

In [None]:
df.tail(scope)['bandwidth'].head()

In [None]:
#bandwidth_tail = df.tail(scope*2).head(scope)['bandwidth'].to_numpy()
tail = df.tail(scope)
type(tail)

In [None]:
tail = tail[['bandwidth']]
type(tail)

In [None]:
tail.head()

In [None]:
""" test_data = np.arange(50)
tail = pd.DataFrame({'bandwidth': test_data})
tail.head() """

In [None]:
span = 8
pred_len = 48

truth = []
ewma8 = []
shifted = []

tail['EWMA8'] = tail['bandwidth'].ewm(span=span, min_periods=8, adjust=True).mean()

b_truth  = tail['bandwidth'].to_numpy()
b_ewma8 = tail['EWMA8'].to_numpy()

tail.head(30)

In [None]:
#b_truth[(span):(span+pred_len)]

In [None]:
#b_ewma8[0+span-1]

In [None]:
#b_truth[0+span-1]

In [None]:
for i in range(len(b_truth) - span - pred_len):
    truth.append(b_truth[(i+span):(i+span+pred_len)])
    ewma8.append(np.repeat(b_ewma8[i+span-1], pred_len)) #correct?
    #ewma8.append(np.repeat(b_ewma8[i+span-0], pred_len)) #informer
    shifted.append(np.repeat(b_truth[i+span-1], pred_len))

#EWMA
rmse = mean_squared_error(truth, ewma8, squared=False)
mae = mean_absolute_error(truth, ewma8)
print(f'EWMA8 rmse: {rmse}, mae: {mae}')

#Shifted
rmse = mean_squared_error(truth, shifted, squared=False)
mae = mean_absolute_error(truth, shifted)
print(f'Shifted rmse: {rmse}, mae: {mae}')