In [1]:
install.packages("forecast")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘xts’, ‘TTR’, ‘quadprog’, ‘quantmod’, ‘colorspace’, ‘fracdiff’, ‘lmtest’, ‘timeDate’, ‘tseries’, ‘urca’, ‘zoo’, ‘RcppArmadillo’




In [2]:
library(xml2)
library(dplyr)
library(forecast)
xml_file <- "563-ws-training (1).xml"
doc <- read_xml(xml_file)
events <- xml_find_all(doc, ".//event")
ts_vec <- xml_attr(events, "ts")
val_vec <- as.numeric(xml_attr(events, "value"))
time_vec <- as.POSIXct(strptime(ts_vec, "%d-%m-%Y %H:%M:%S"))
df <- data.frame(time = time_vec, glucose = val_vec) %>% arrange(time)
df$time_num <- as.numeric(difftime(df$time, min(df$time), units = "secs"))
n_total <- nrow(df)
train_size <- floor(0.7 * n_total)
train_df <- df[1:train_size, ]
test_df <- df[(train_size + 1):n_total, ]
lm_model <- lm(glucose ~ time_num, data = train_df)
test_df$predicted_glucose <- predict(lm_model, newdata = test_df)
rmse_lm <- sqrt(mean((test_df$glucose - test_df$predicted_glucose)^2))
mae_lm <- mean(abs(test_df$glucose - test_df$predicted_glucose))
cat("Linear Regression RMSE:", rmse_lm, "\n")
cat("Linear Regression MAE:", mae_lm, "\n")
summary(lm_model)
train_ts <- ts(train_df$glucose, frequency = 1)
arima_model <- auto.arima(train_ts)
h <- nrow(test_df)
forecast_results <- forecast(arima_model, h = h)
predicted_glucose_arima <- as.numeric(forecast_results$mean)
test_glucose <- test_df$glucose
rmse_arima <- sqrt(mean((test_glucose - predicted_glucose_arima)^2))
mae_arima <- mean(abs(test_glucose - predicted_glucose_arima))
cat("ARIMA RMSE:", rmse_arima, "\n")
cat("ARIMA MAE:", mae_arima, "\n")
summary(arima_model)
mape_lm <- mean(abs((test_df$glucose - test_df$predicted_glucose) / test_df$glucose)) * 100
mape_arima <- mean(abs((test_glucose - predicted_glucose_arima) / test_glucose)) * 100
cat("\nPerformance Metrics Comparison\n")
cat("Linear Regression - RMSE:", round(rmse_lm, 2), "MAE:", round(mae_lm, 2), "MAPE:", round(mape_lm, 2), "\n")
cat("ARIMA - RMSE:", round(rmse_arima, 2), "MAE:", round(mae_arima, 2), "MAPE:", round(mape_arima, 2), "\n")
if (rmse_arima < rmse_lm) {
cat("\nConclusion: ARIMA model has lower RMSE and is more reliable for this data.\n")
} else {
cat("\nConclusion: Linear Regression has lower RMSE and is more reliable for this data.\n")
}





Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 



Linear Regression RMSE: 50.92409 
Linear Regression MAE: 42.2728 



Call:
lm(formula = glucose ~ time_num, data = train_df)

Residuals:
   Min     1Q Median     3Q    Max 
-98.92 -36.97  -4.92  28.96 248.74 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.303e+02  1.010e+00   129.0   <2e-16 ***
time_num    7.866e-06  6.449e-07    12.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 47.66 on 8484 degrees of freedom
Multiple R-squared:  0.01724,	Adjusted R-squared:  0.01712 
F-statistic: 148.8 on 1 and 8484 DF,  p-value: < 2.2e-16


ARIMA RMSE: 84.07142 
ARIMA MAE: 71.41497 


Series: train_ts 
ARIMA(2,1,2) 

Coefficients:
         ar1     ar2      ma1      ma2
      0.6275  0.1474  -0.1413  -0.1695
s.e.  0.1499  0.1116   0.1491   0.0427

sigma^2 = 15.9:  log likelihood = -23773.42
AIC=47556.83   AICc=47556.84   BIC=47592.06

Training set error measures:
                        ME     RMSE      MAE        MPE     MAPE      MASE
Training set -0.0001291555 3.986021 2.339842 0.04559842 1.760701 0.7175899
                     ACF1
Training set 0.0002001344


Performance Metrics Comparison
Linear Regression - RMSE: 50.92 MAE: 42.27 MAPE: 32.03 
ARIMA - RMSE: 84.07 MAE: 71.41 MAPE: 62.77 

Conclusion: Linear Regression has lower RMSE and is more reliable for this data.
