In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
import keras 

dataset = pd.read_csv('data.csv')

In [None]:
greece_data = dataset.loc[dataset['Entity'] == 'Greece', ['Date', 'Cases', 'Daily tests']].copy().reset_index(drop=True)

greece_data = greece_data.drop(greece_data.index[range(0, 25)])
greece_data = greece_data.drop(greece_data.index[range(364 - 25, 368 - 25)])
greece_data = greece_data.reset_index(drop=True)

greece_data['Date'] = pd.to_datetime(greece_data['Date'])
greece_data.sort_values('Date', inplace=True)

greece_data['Positivity'] = (greece_data['Cases'].diff() / greece_data['Daily tests']) * 100

greece_data

In [None]:
data = greece_data[['Positivity']].copy()
data

In [None]:
# Forward fill and backward fill to remove NaN values and drop duplicates
data = data.apply(lambda x: x.fillna(method='ffill'))
data = data.apply(lambda x: x.fillna(method='bfill'))
data

In [None]:
data = MinMaxScaler(feature_range=(0,1)).fit_transform(data)

In [None]:
X_train = []
y_train = []
X_test = []
y_test = []

for i in range(3, greece_data.loc[greece_data['Date'] == '2021-01-01'].index[0]):
   X_train.append(data[i-3:i,0]) 
   y_train.append(data[i+3,0])
    
for i in range(greece_data.loc[greece_data['Date'] == '2021-01-01'].index[0], data.size - 3):
   X_test.append(data[i-3:i,0]) 
   y_test.append(data[i+3,0])

In [None]:
X_train = np.array(X_train)
y_train = np.array(y_train)

X_test = np.array(X_test)
y_test = np.array(y_test)

X_train_rnn = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test_rnn = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

X_train_svm = np.reshape(X_train, (X_train.shape[0], -1))
X_test_svm = np.reshape(X_test, (X_test.shape[0], -1))

In [None]:
model = keras.Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(X_train_rnn.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

result = model.fit(X_train_rnn, y_train, epochs=100, batch_size=32)

In [None]:
model_svm = SVR(kernel='rbf')

model_svm.fit(X_train_svm, y_train)

In [None]:
# Find the index corresponding to the '2021-01-01' date
start_index = greece_data.loc[greece_data['Date'] == '2021-01-01'].index[0]

# Create a DataFrame with 'Date' and 'y_test' values
result_df = pd.DataFrame({
    'Date': greece_data['Date'][start_index+3:],
    'y_test': y_test.flatten()
})

result_df.set_index('Date', inplace=True)

In [None]:
y_pred_rnn = model.predict(X_test_rnn)

# Plotting the predicted and actual values for RNN model
plt.figure(figsize=(10, 6))
plt.plot(result_df.index, result_df['y_test'], label='Actual')
plt.plot(result_df.index, y_pred_rnn, label='Predicted (RNN)')
plt.xlabel('Date')
plt.ylabel('Positivity')
plt.title('Positivity - Actual vs Predicted (RNN Model)')
plt.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
y_pred_svm = model_svm.predict(X_test_svm)

# Plotting the predicted and actual values for SVM model
plt.figure(figsize=(10, 6))
plt.plot(result_df.index, result_df['y_test'], label='Actual')
plt.plot(result_df.index, y_pred_svm, label='Predicted (SVM)')
plt.xlabel('Date')
plt.ylabel('Positivity')
plt.title('Positivity - Actual vs Predicted (SVM Model)')
plt.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
# RNN Model Evaluation
rnn_predictions = model.predict(X_test_rnn)
rnn_mse = mean_squared_error(y_test, rnn_predictions)
rnn_mae = mean_absolute_error(y_test, rnn_predictions)
print("RNN Model Evaluation:")
print("MSE:", rnn_mse)
print("MAE:", rnn_mae)

# SVM Model Evaluation
svm_predictions = model_svm.predict(X_test_svm)
svm_mse = mean_squared_error(y_test, svm_predictions)
svm_mae = mean_absolute_error(y_test, svm_predictions)
print("SVM Model Evaluation:")
print("MSE:", svm_mse)
print("MAE:", svm_mae)

Βασιζόμενοι μόνο σε αυτές τις μετρικές παρατηρούμε ότι το SVM είναι ελάχιστα καλύτερος παλινδρομητής σε σχέση με το RNN αφού έχει μικρότερες MAE και MSE τιμές.