# RNN for Smoothed Sunspot Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Data/SVG_poly1.csv') # importing data for SVG polyorder = 1
X = ((df.Year).values).reshape(-1,1)
y = (df.Sunspot_Number).values
yerr = np.zeros(len(X))

df.drop(columns=['Unnamed: 0'], inplace=True) 
# removes the unnecessary column
df["Year"] = pd.to_datetime(df["Year"], format="%Y")
df.set_index("Year", inplace=True)


df.head(), df.tail()

(            Sunspot_Number
 Year                      
 1818-01-01       34.566570
 1818-01-01       34.546053
 1818-01-01       34.525535
 1818-01-01       34.505017
 1818-01-01       34.484499,
             Sunspot_Number
 Year                      
 2022-01-01       68.947962
 2022-01-01       69.007779
 2022-01-01       69.067596
 2022-01-01       69.127413
 2022-01-01       69.187230)

## Data Preprocessing

In [3]:
no = 70000
train = df.iloc[:no:10]
test = df.iloc[no::10]

In [4]:
train

Unnamed: 0_level_0,Sunspot_Number
Year,Unnamed: 1_level_1
1818-01-01,34.566570
1818-01-01,34.361393
1818-01-01,34.156216
1818-01-01,33.951039
1818-01-01,33.745862
...,...
2009-01-01,16.791923
2009-01-01,17.114990
2009-01-01,17.515400
2009-01-01,18.072553


In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [6]:
scaler.fit(train)
scaled_train = scaler.transform(train)
scaled_test = scaler.transform(test)

In [7]:
from keras.preprocessing.sequence import TimeseriesGenerator

2023-02-03 16:39:21.704031: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
# define generator
n_input = 11 * 12 
n_features = 1 # we are only using 1 timeseries to make our predictions
generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)

In [9]:
X,y = generator[0]
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n {y}')

X.shape

Given the Array: 
[0.12604796 0.12518466 0.12432136 0.12345807 0.12259477 0.12173147
 0.12086817 0.12000487 0.11914158 0.11827828 0.11741498 0.11655168
 0.11568838 0.11482509 0.11396179 0.11309849 0.11223519 0.11137189
 0.1105086  0.1096453  0.108782   0.1079187  0.1070554  0.10619211
 0.10532881 0.10446551 0.10360221 0.10273891 0.10187562 0.10101232
 0.10014902 0.09928572 0.09842242 0.09755913 0.09669583 0.09583253
 0.09496923 0.09410593 0.09324264 0.09237934 0.09151604 0.09065274
 0.08978944 0.08892615 0.08806285 0.08719955 0.08633625 0.08547295
 0.08460966 0.08374636 0.08288306 0.08201976 0.08115646 0.08029317
 0.07942987 0.07856657 0.07770327 0.07683997 0.07597668 0.07511338
 0.07425008 0.07338678 0.07252348 0.07166019 0.07079689 0.06993359
 0.06907029 0.06820699 0.0673437  0.0664804  0.0656171  0.0647538
 0.0638905  0.06302721 0.06286593 0.06197027 0.0617514  0.06133957
 0.06039495 0.06040935 0.06075494 0.0608615  0.0609047  0.06100838
 0.05972105 0.05906155 0.0576907  0.05638609 

(1, 132, 1)

In [10]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [11]:
# define model
model = Sequential() # adds layers in a sequence
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
# 100 neurons
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100)               40800     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 40,901
Trainable params: 40,901
Non-trainable params: 0
_________________________________________________________________


2023-02-03 16:39:24.364978: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# fit model
model.fit(generator,epochs=10)

Epoch 1/10

In [None]:
loss_per_epoch = model.history.history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)

In [None]:
last_train_batch = scaled_train[-n_input:] 
# take the last n_input month of values, to make 
# predictions on the 1st test set

In [None]:
last_train_batch = last_train_batch.reshape((1, n_input, n_features))

In [None]:
model.predict(last_train_batch)

In [None]:
test_predictions = []

first_eval_batch = scaled_train[-n_input:]
current_batch = first_eval_batch.reshape((1, n_input, n_features))

for i in range(len(test)):
    
    # get the prediction value for the first batch
    current_pred = model.predict(current_batch)[0]
    
    # append the prediction into the array
    test_predictions.append(current_pred) 
    
    # use the prediction to update the batch and remove the first value
    current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)

In [None]:
test_predictions
# NOTE: these predictions are in the range 0-1
# we need to convert it back 

In [None]:
true_predictions = scaler.inverse_transform(test_predictions)

In [None]:
test['Predictions'] = true_predictions

In [None]:
test.plot(figsize=(14,5));