In [None]:
import pandas as pd
import numpy as np
from numpy import array
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import Input
from random import randrange
import random
from tqdm import tqdm
from array import array
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import TimeseriesGenerator
import io
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
import sys
from matplotlib.pyplot import figure


One common approach to sequential series prediction is to use a type of recurrent neural network (RNN) called a long short-term memory (LSTM) network or a gated recurrent unit (GRU) network.

In [None]:
data = pd.read_csv('Italia-positivi-giornaliero.csv')

In [None]:
data = data.totale_positivi.values.tolist()

In [None]:
def sequence_split(sequence, inp,outp):
    from numpy import array
    X,y  = list(),list()
    for i in range(len(sequence)):
        end_ix = i + inp  
        if end_ix > len(sequence)-1: 
            break
        seq_x, seq_y = sequence[i:end_ix],sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [None]:
X,y = sequence_split(data, 10, 1)

In [None]:
for i in X:
  print(i)

### IMPLEMENTING MLP WITH THE ITALIA-POSITIVI DATASET

In [None]:
MLP = Sequential()
MLP.add(Dense(50, activation = 'relu' , input_dim = 10))
MLP.add(Dense(1))
# MLP.add(Dense(128, activation = 'relu'))

MLP.compile(optimizer = 'adam', loss = 'mse', metrics=['accuracy'])
MLP.summary()

In [None]:
history = MLP.fit(X,y, epochs = 500, batch_size = 128) # Fitting our data to the model

In [None]:
# making a prediction using the following values
pred_data = np.array([102859, 106920, 110659, 115112, 119230, 120875, 123396, 127085, 132513, 137130])
pred_data = pred_data.reshape(1,X.shape[1])
y_hat = MLP.predict(pred_data, verbose=0)

In [None]:
print("Predicted Value",y_hat)  # printing our predicted value


In [None]:
plt.plot(history.history['loss'])
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)

### Experimenting on MobyDict dataset

In [None]:
trend_data = pd.read_csv('Italia-trend-giornaliero.csv')


In [None]:
trend_data.drop(trend_data.columns[2:], axis=1, inplace=True)

In [None]:
trend_data.dtypes

In [None]:
trend_data

In [None]:
trend_data = trend_data.ricoverati.values.tolist()

In [None]:
X_trend,y_trend = sequence_split(trend_data, 10)

In [None]:
for x in X_trend: 
  print(x)

In [None]:
history = MLP.fit(X_trend,y_trend, epochs = 200, batch_size = 128) # Fitting our data to the model

In [None]:
pred_data = np.array([3525, 3597 ,3647, 3808 ,3970, 4060, 4088 ,4145 ,4250 ,4345]) # making a prediction
pred_data = pred_data.reshape(1,X_trend.shape[1])
y_hat = MLP.predict(pred_data, verbose=0)
y_hat

In [None]:
plt.plot(history.history['loss'])

### Implementing the MobyDict Dataset  taking part of the dataset for analysis

In [None]:
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Embedding
from numpy import array

In [None]:
# file = open("MobyDick.txt", mode="r") 

In [None]:
moby_data = """ It had cooled and crystallized to such a degree, that when, with several others, I sat down before a large Constantine’s bath of it, I found it strangely concreted into lumps, here and there rolling about in the liquid part. It was our business to squeeze these lumps backinto fluid. /n
A sweet and unctuous duty! No wonder that in old times this sperm was such a favourite cosmetic. /n
Such a clearer! such a sweetener! such a softener! such a delicious molifier! After having my hands in it for only a few minutes, my fingers felt like eels, and began, as it were, to serpentine and spiralise. /n
As I sat there at my ease, cross-legged on the deck; after the bitter exertion at the windlass; under a blue tranquil sky; the ship under indolent sail, and gliding so serenely along; /n
as I bathed my hands among those soft, gentle globules of infiltrated tissues, woven almost within the hour; as they richly broke to my fingers, and discharged all their opulence, like fully ripe grapes their wine; /n
as I snuffed up that uncontaminated aroma,—literally and truly, like the smell of spring violets; /n
I declare to you, that for the time I lived as in a musky meadow; /n
I forgot all about our horrible oath; in that inexpressible sperm, I washed my hands and my heart of it; /n
I almost began to credit the old Paracelsan superstition that sperm is of rare virtue in allaying the heat of anger; while bathing in that bath, I felt divinely free from all ill-will, or petulance, or malice, of any sort whatsoever.
\n """

In [None]:
# We convert all this the words into numbers by fitting  in the texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts([moby_data])
encoded_data = tokenizer.texts_to_sequences([moby_data])[0]
encoded_data

In [None]:
text_size = len(tokenizer.word_index) + 1
text_size

In [None]:
from keras.utils.timeseries_dataset import sequences_from_indices
# Creating a sequence of words to fit the model with input and output datas

sequences = list() # creating an empty list
for i in range(1, len(encoded_data)):
  sequence = encoded_data[i-1:i+1]
  sequences.append(sequence)
print("The length of the sequence is: ",len(sequences))
sequences

In [None]:
# Splitting the data into input and output

In [None]:
sequences = np.array(sequences)
inp, outp = sequences[:,0], sequences[:,1]

In [None]:
inp[:10],outp[:10]

In [None]:
outp = to_categorical(outp, num_classes=text_size) # creating a one hot encoding
outp[:10]

In [None]:
# Building our model

In [None]:
model = Sequential()
model.add(Embedding(text_size, 50, input_length=1))
model.add(LSTM(50))
model.add(Dense(text_size, activation = 'softmax'))
# model.add(Dense(10))
model.summary()

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(inp, outp, epochs=200)

In [None]:
#  generating a sequence from the model

def generate_seg(model, tokenizer, enter_text, n_pred):
  in_text, result = enter_text, enter_text
  for _ in range(n_pred):
    encoded = tokenizer.texts_to_sequences([in_text])[0]
    encoded = array(encoded)

    y_pred = model.predict(encoded).any()
    out_word = ''
    for word, index in tokenizer.word_index.items():
      if index == y_pred:
        out_word = word
        break
    in_text, result = out_word, result + ' ' + out_word
    return result




In [None]:
print(generate_seg(model, tokenizer, 'bitter', 5))

In [None]:
# MLP WITH MOBY-DICK DATASET

In [None]:
# Implementing the Covid Dataset

In [None]:
covid_data = pd.read_csv('TotalPositiveVariation.csv')
covid_data

In [None]:
covid_data = covid_data.variazione_totale_positivi.values.tolist()
covid_data

In [None]:
covid_X,covid_y = sequence_split(covid_data, 10)

In [None]:
covid_X

In [None]:
for cov in covid_X:
  print(cov)

In [None]:
covid_history = MLP.fit(covid_X,covid_y, epochs = 100, batch_size = 128) # Fitting our data to the model

In [None]:
pred_data = np.array([3739 ,4453, 4118, 1645 ,2521 ,3689, 5428, 4617, 6271 ,5359]) # making a prediction
pred_data = pred_data.reshape(1,covid_X.shape[1])
y_hat = MLP.predict(pred_data, verbose=0)
y_hat

In [None]:
plt.plot(covid_history.history['loss'])

#### Question 2 IMPLEMENTING THE ABOVE IN RNN 

In [None]:

def generate_dataset(time_steps, num_examples, input_dim, output_dim):
    X = np.random.randn(num_examples, time_steps, input_dim)
    y = np.random.randn(num_examples, output_dim)
    return X, y

# Define the LSTM model
def build_model(time_steps, input_dim, output_dim):
    model = Sequential()
    model.add(LSTM(32, input_shape=(time_steps, input_dim)))
    model.add(Dense(output_dim))
    return model

# Generate the dataset
time_steps = 10
input_dim = 1
output_dim = 1
X, y = generate_dataset(time_steps, 100, input_dim, output_dim)

# Build and compile the model
model = build_model(time_steps, input_dim, output_dim)
model.compile(loss='mean_squared_error', optimizer='adam')

# Fit the model to the data
history = model.fit(X, y, epochs=100, verbose=0)

# Predict the output
predictions = model.predict(X)

# Plot the predicted values
plt.plot(predictions, 'r', label='Predicted')
plt.plot(y, 'b', label='Actual Value')
plt.legend()
plt.show()


In [None]:
X,y = sequence_split(data, 10)

In [None]:
for values in X:
  print(values)

In [None]:
from keras.layers import RNN, SimpleRNN
from keras.models import Sequential

# define the model
RNN = Sequential()
RNN.add(SimpleRNN(50, input_shape=(None, 1)))

# compile the model
RNN.compile(optimizer='adam', loss='mean_squared_error')

# fit the model to the data
history = RNN.fit(X, y, epochs=200)

In [None]:
pred_data = np.array([110659, 115112, 119230 ,120875, 123396, 127085 ,132513, 137130, 143401 ,148760]) # making a prediction
pred_data = pred_data.reshape(1,X.shape[1])
y_hat = RNN.predict(pred_data, verbose=0)


In [None]:
predictions = RNN.predict(X)

# Plot the predicted values
plt.plot(predictions, 'r', label='Predicted')
plt.plot(y, 'b', label='Actual Value')
# plt.legend()
plt.show()


In [None]:
predictions = RNN.predict(X)

# Plot the predicted values
plt.plot(y_hat, 'r', label='Predicted')
plt.plot(y, 'b', label='Actual Value')
# plt.legend()
plt.show()


#### Implementing LSTM 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Load the dataset
df = pd.read_csv('Italia-trend-giornaliero.csv',parse_dates = ['data'], index_col = ['data'])
df.drop(df.columns[2:], axis=1, inplace=True)

# Normalize the data
scaler = MinMaxScaler()
df = scaler.fit_transform(df)

# Split the data into train and test sets
split_index = int(len(df) * 0.8)
x_train, y_train = df[:split_index, :-1], df[:split_index, -1]
x_test, y_test = df[split_index:, :-1], df[split_index:, -1]

# Reshape the data for the LSTM
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

# Build the model
model = Sequential()
model.add(LSTM(64, input_shape=(1, x_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

# Fit the model to the training data
trend_history = model.fit(x_train, y_train, epochs=100, batch_size=16, validation_data=(x_test, y_test), verbose=2, shuffle=False)

# Make predictions on the test data
predictions = model.predict(x_test)



In [None]:


# Plot the results
plt.plot(y_test, label='True')
plt.plot(predictions[:, 0], label='Prediction')
plt.legend()
plt.show()

In [None]:
plt.plot(trend_history.history['loss'])

In [None]:
#### Implementing GRU

In [None]:
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import GRU, Dense

df = pd.read_csv('Italia-trend-giornaliero.csv',parse_dates = ['data'], index_col = ['data'])
df.drop(df.columns[2:], axis=1, inplace=True)


# Normalize the data
scaler = MinMaxScaler()
df = scaler.fit_transform(df)

# Split the data into train and test sets
split_index = int(len(df) * 0.8)
x_train, y_train = df[:split_index, :-1], df[:split_index, -1]
x_test, y_test = df[split_index:, :-1], df[split_index:, -1]

# Reshape the data for the GRU
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

# Build the model
model = Sequential()
model.add(GRU(64, input_shape=(1, x_train.shape[2])))
model.add(Dense(1))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy'])

# Fit the model to the training data
trend_hist = model.fit(x_train, y_train, epochs=100, batch_size=16,validation_data=(x_test, y_test), verbose=2, shuffle=False)


In [None]:
# Make predictions on the test data
predictions = model.predict(x_test)

# Unnormalize the data
# predictions = scaler.inverse_transform(predictions)
# y_test = scaler.inverse_transform([y_test])

# Plot the results
plt.plot(y_test, label='True')
plt.plot(predictions[:, 0], label='Prediction')
plt.legend()
plt.show()

In [None]:
print(trend_history.history.keys())
#  "Accuracy"
plt.plot(trend_hist.history['accuracy'])
plt.plot(trend_history.history['loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
plt.plot(trend_history.history['loss'])

In [None]:
# The Italia positivi dataset

In [None]:
for i in X:
  print(i)

In [None]:
positivi_history = GRU.fit(X,y, epochs = 100, batch_size = 128) # Fitting our data to the model

In [None]:
predictions = GRU.predict(y)

pred_data = np.array([115112 ,119230 ,120875 ,123396, 127085, 132513, 137130, 143401 ,148760,151514]) # making a prediction
pred_data = pred_data.reshape(1,X.shape[1])
y_hat = GRU.predict(pred_data, verbose=0)

# Plot the results
plt.plot(y_test, label='True')
plt.plot(predictions[:, 0], label='Prediction')
plt.legend()
plt.show()

In [None]:
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import GRU, Dense

# Load the dataset
pos_data = pd.read_csv('Italia-positivi-giornaliero.csv')
# Normalize the data
scaler = MinMaxScaler()
pos_data = scaler.fit_transform(pos_data)

# Split the data into train and test sets
split_index = int(len(pos_data) * 0.8)
x_train, y_train = pos_data[:split_index, :-1], pos_data[:split_index, -1]
x_test, y_test = pos_data[split_index:, :-1], pos_data[split_index:, -1]
print(x_test)
# Reshape the data for the GRU
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

# Build the model
gru = Sequential()
gru.add(GRU(64, input_shape=(1, x_train.shape[2])))
gru.add(Dense(1))
gru.compile(loss='mean_squared_error', optimizer='adam')

# Fit the model to the training data
GRU_history = gru.fit(x_train, y_train, epochs=10, batch_size=16, validation_data=(x_test, y_test), verbose=2, shuffle=False)


In [None]:
plt.plot(GRU_history.history['loss'])

### THE PI DATASET IMPLEMENTATION USING MLP

In [None]:
import sys
sys.set_int_max_str_digits(int(10E6))

In [None]:

with open('pi1000000.txt') as f:
  pi_data = f.readlines()
pi_data = np.array(pi_data)
pi_data = pi_data[0][2:]
sys.set_int_max_str_digits(int(10E6))
pi_data = int(pi_data)
  
pi_data = str(pi_data)
pi_data = np.array([ int(i) for i in pi_data ])
pi_data = pi_data.flatten()
print(pi_data.shape)


In [None]:
def split_array(data_array, predictors = 10):
    split_data = []

    # define input sequence
    seq = np.arange(predictors)

    i = 1
    start = predictors 
    seq = np.arange(start)
    
    while i > 0 and start > 0:
        split_data.append(data_array[ start-1: -i])
        i+=1
        start-=1

    return split_data, seq

In [None]:
pi_data[:1000][-1]

In [None]:
split_data, seq = split_array(pi_data[:1000])
split_data = np.array(split_data)
split_data.shape

In [None]:
output_k = []
for i in seq:
    raw_seq = split_data[i]
    n_steps_in, n_steps_out = 20, i + 1
    X, y = sequence_split(raw_seq, n_steps_in, n_steps_out)
    # define model
    model = Sequential()   
    model.add(Dense(100, activation='relu', input_dim=n_steps_in))
    model.add(Dense(n_steps_out))
    model.compile(optimizer='adam', loss='mse')
    # fit model
    pi_history =model.fit(X, y, epochs=300, verbose=0)
    # demonstrate prediction
    x_input = split_data[i][-n_steps_in:]
    x_input = x_input.reshape((1, n_steps_in))
    yhat = model.predict(x_input, verbose=0)
    
    output_k.append(yhat[-1][-1])

In [None]:
plt.plot(pi_history.history['loss'])

In [None]:
output_k

In [None]:
np.mean(output_k)