<a href="https://colab.research.google.com/github/harnalashok/deeplearning-sequences/blob/main/temperature_forecasting_time_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Last amended: 26th July, 2019
# My folder: /home/ashok/Documents/14.sensor_data
# VM: lubuntu_deeplearning_I.vdi
# Ref: Page 207, Chapter 6, Deep Learning with Python by Fracois Chollete
# Download dataset from:
# 1. Link to my google drive
#  https://drive.google.com/file/d/1rnhlFKmmmhXqawaIBgjSTsqGrTLCUldV/view?usp=sharing
# 2. Link to original datasource
#  https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip

In [None]:
# Objectives:
#             i)  Working with timeseries data
#             i)  Working with sensor data
#                 (Data comes from many sensors)
#             ii) Processing data to make it fit for modeling
#            iii) Creating a data generator for training and validation
#            iv)  Making predictions using
#                   a) Fully connected dense model
#                   b) GRU model
#                   c) GRU model with dropouts
#                   d) Stacked GRU models
#                   e) Bidirectional RNN layer
#
#

## Task

In [None]:
# We will predict temperature
# Sensor data is recorded every 10 minutes. So per-day we have:
#   no of minutes:              24 * 60     =  1440
#   no of 10 minutes interval: (24 * 60)/10 = 144 datapoints/per day
#   no of data-points in 10 days: 1440

## Call libraries

In [None]:
# Reset all variables
%reset -f

# 1.0 Call libraries
import numpy as np
import matplotlib.pyplot as plt
import os, time, gc

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


## How to read csv data in numpy

In [None]:
# 1.1 Where is my data?
data_dir = '/home/ashok/.keras/datasets/jena_climate'

In [None]:
# 1.2 Join datapath with filename (intelligently)
#     If you are on Windows, assign to fname full
#     data path+ filename
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')
fname



In [None]:
# 1.3 Read datafile, line by line
# 1.3.1 First get a handle to file
f = open(fname)   # open() default mode is text+read
# 1.3.2 Use handle to read complete file
data = f.read()

In [None]:
# 1.3.3 Close file
f.close()

## Observe read numpy data

In [None]:
# 1.3.4 Observe data
type(data)        # str
data[0:200]      # Whole data is read as one string
                 # Read first 200 characters of string
                 # Note '\n' at 196th character


In [None]:
# 1.4 Look at data
# 1.4.1 Split data on newline (\n). So how many records?
lines = data.split('\n')    # Split each line at 'newline'
type(lines)                 # list =>  All lines are in one list
len(lines)                  # 420552
type(lines[50])             # Each element of list is still a 'str'


In [None]:
# 1.4.2 Does any header exist? Check
lines[0]                   # yes, it does
lines[1]

In [None]:
# 1.4.3 Extract header (field-names)
header = lines[0].split(',')  # Split at each ','
header

In [None]:
# 1.4.4 How many columns/fields?
cols = len(header)
cols             # 15

In [None]:
# 1.4.5 Print first three rows.
#       Note 10-minute gap in the
#       observations
lines[1:4]     # A list of 3 string elements
len(lines)    # 420552 or header + 420551 data points

In [None]:
totalsamples = len(lines) - 1    # 420551 (exclude header)

## Data conversion

In [None]:
# 2.0 Convert all 420551 data points into a numpy array
#     for processing

# 2.1 First create a zero-valued 2D-array
#      While creating zero-valued 2D-array, forget
#        first column or time data
#     So array size will be same as that of data
#     We will also forget 1st column
float_data = np.zeros((totalsamples, cols -1 ))  # Exclude 1st date/time col
float_data.shape           # (420551,14)

In [None]:
# 2.2 Fill this 2D-zero-valued array, row-by-row using for-loop
# 2.2.1 First get an array of 420551 values
#       0 ,1, 2, 3,...420550
numbList=np.arange(len(lines) - 1 )
numbList

In [None]:
# 2.2.2 See how a line is split in respective
#       field values. We want split values to
#       be an array. But after the split,
#       they are a list
x = lines[1].split(',')
type(x)      # list

In [None]:
# 2.2.3
x = np.asarray(x[1:], dtype = 'float32') # Exclude 1st date/time column
type(x)
x

In [None]:
# 2.3  Fill up zero-array,row-by-row, with sensor data
for i in numbList:      # ie uptil the last line
    # 2.3 Now do this for all lines using for-loop
    row = lines[i+1].split(',')     # i starts from 0 but we ignore header
    # 2.3.1 'row' is a list. Select all but 1st element
    row= row[1:]                    # Ignore the date column
    values = np.asarray(row, dtype = 'float32')
    float_data[i, :] = values      # Fill zero-array, row-by-row

In [None]:
# 2.3.2 Check
float_data.shape     # (420551,14)

# 2.3.2
float_data[0]

## Plotting temperature

In [None]:
# 3. Let us plot temperature, the IInd column
#    Check 'header', if you like
# 3.1 Get column with index 1
temp = float_data[:, 1]
temp

In [None]:
# 3.2 plot it. It is highly periodic
plt.plot(range(len(temp)), temp)
plt.show()