In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from numpy import arange, pi
from scipy.signal import savgol_filter

import datetime
import time

%matplotlib inline 



df = pd.read_csv('data/svd_train_v01.csv', delimiter=';')

t = np.array(
        [ 
            time.mktime(
                datetime.datetime.strptime( strd, "%d/%m/%Y %H:%M" ).timetuple() 
            ) for strd in df[ 't_stamp' ].values 
        ]
    )


fn = df[ 'T' ].values
# normalizing function values (nfn is identical to fn)
fn = (fn - min(fn)) / (max(fn)-min(fn))
nfn = fn


print( "Average time separation:", sum((t[ 1: ] - t[:-1]))/(t.shape[0]-1)/(60*60) )



# if running in 'adaptive' mode, a sample is used to update the model
# only if the absolute prediction error is above a specified threshold
adaptive = False
errorThreshold = 1.0e-3
# if adaptive mode is 'false' then a percentage of the samples is used
# for training (first percentage * nbSamples, cf. window 'scanner' below)
percentageTraining = 1.0

# response values, actual vs predicted (lresp is for the learnt samples/responses)
resp = []; xresp = []; xlresp = [];
# index of the samples that were used as input for learning
lsindx = []



# index and size of the window we will use to scan the data; response offset w.r.t. window
nbTimeSteps = 10; nbRespValues = 1
indx = 0; windowSize = nbTimeSteps; maxModelSamples = 150
windowPerturbation = 1; lastTstampOffset = 1; endingGap = lastTstampOffset + nbTimeSteps*windowPerturbation


# number of hidden units (rough ann simulation)
nbHUnits = 7
# initialize a matrix of randow weights (input-hidden layer)
rweights = np.random.randn( windowSize, nbHUnits )
# output weights (that need to be determined, first random)
oweights = np.random.randn( nbHUnits, 1 )



# ann svd for the output layer
annisvd = None


sampleData = None
sampleDataResponse = None



# counters and basic information (time spent training, nb. of samples, etc.)
startt = time.time()

trainingTime = 0
nbTrainingSamples = 0
nbSamples = 0

predTstamps = []

while indx < t.shape[ 0 ] - nbTimeSteps - endingGap:
    
    # generate a set of index values (cummulative sum, will be monotonically increasing)
    xindx = np.random.randint( 0, windowPerturbation, nbTimeSteps ).reshape( 1, nbTimeSteps )
    xindx = np.sort( xindx ); xindx = np.arange( indx, indx+nbTimeSteps ) + np.cumsum( xindx )
    # offset the last timestamp as to 'simulate' looking to a distant point
    xindx[ -1 ] = xindx[ -1 ] + lastTstampOffset
    
    # values and timestamps (from the noised function and the fixed-frequency timestamps)
    xvalues = nfn[ xindx ].reshape( nbTimeSteps, 1)
    #xtstamp = t[ xindx ].reshape( nbTimeSteps, 1 ) # TIMESTAMP
    xtstamp = ( t[ xindx ] - t[ indx ] ).reshape( nbTimeSteps, 1 ) # OFFSET
    #xtstamp[ 0 ] = t[ xtstamp[0]+indx ] # INSERT TIMESTAMP AT FIRST POSITION

    xtstamp = xtstamp[ -1 ]
    xtstamp = xtstamp / (3.3*3600.0)
    
    # construct a sample by stacking together the timestamp and the values
    xsample = np.vstack( (xtstamp.reshape((1,1)), xvalues) ).reshape( 1, 1+nbTimeSteps )
    # drop the actual 'last' value from the sample (will be used as response)
    xsample = xsample[ :, :-1 ]
    
    # calculate the predicted value for the previously constructed sample
    xhlinp = np.dot( xsample, rweights )
    xhlinp = 1.0/( 1.0 + np.exp( -0.1*xhlinp ) ) - 0.5
        
    # predicted response
    xpresponse = np.dot( xhlinp.reshape( 1, nbHUnits ), oweights ).reshape( 1, )
    error = np.abs( xpresponse-xvalues[-1] )
    
    xresp = np.append( xresp, xpresponse[ -1 ] )
    resp  = np.append( resp, xvalues[ -1 ] )

    
    if sampleData is None:
        sampleData = xsample
        sampleDataResponse = xpresponse
    else:
        sampleData = np.vstack( (sampleData, xsample ) )
        sampleDataResponse = np.vstack( (sampleDataResponse, xpresponse) )
    
    
    
    #predTstamps = np.append( predTstamps, xtstamp[ -1 ] ) # TIMESTAMP
    predTstamps = np.append( predTstamps, t[ xindx[-1] ] ) # TIMESTAMP WHEN USING GAPS

            
    # no svd has been computed up to now, no prediction can be made
    if annisvd == None:
        
        X = xhlinp.reshape( nbHUnits, 1 )
        U, s, Vt = np.linalg.svd( X, full_matrices=False )
        
        annisvd = iSVD( U, s, Vt, updateVt = True  )        
        xlresp = [ xvalues[ -1 ] ]
        sampleAdded = True
                
    else:
        
        if (not adaptive and indx < t.shape[ 0 ] * percentageTraining) or (adaptive and error > errorThreshold):
            annisvd.update( xhlinp.reshape( nbHUnits, 1 ) )
            xlresp = np.vstack( (xlresp, xvalues[-1]) )                    
            
            if annisvd.Vt.shape[ 1 ] > maxModelSamples:
                try:
                    annisvd.downdate( 0 )
                    xlresp = xlresp[ 1:, : ]
                except:
                    print( "FAILED IN DOWNDATING VIA SVD")
                    
    # updating the output weights - note that the update method adds columns
    # and NOT rows (as it would be expected for Ax = y); the incremental SVD
    # we obtain is thus for A^T - we need however the A^+ (pseudoeinverse of A)
    
    # if A^T = UsV^T (SVD) and knowing that i. (A^T)+ = Vs^{-1}U^T; and (ii) (A^+)^T = (A^T)^+
    # we have that (A^+)^T = Vs^{-1}U^T, following that A^+ = Us^{-1}V^T
    
    # in order to upate the weights, we need to calculate x = A^+ y            
    
    if (not adaptive and indx < t.shape[ 0 ] * percentageTraining) or (adaptive and error > errorThreshold) :

        tstart = time.time()
        nbTrainingSamples = nbTrainingSamples+1

        Ux, sx, Vtx = annisvd.svd()

        nzindx = sx > 0; sx[ nzindx ] = 1.0/sx[ nzindx ]
        oweights = np.dot(
            np.dot( Ux, np.dot( np.diag( sx ), Vtx ) ),
            xlresp
        )
        
        tend = time.time()
        trainingTime = trainingTime + (tend-tstart)
        
        lsindx = np.append( lsindx, t[ indx ] )
    
    nbSamples = nbSamples+1
    indx = indx+1

endt = time.time()
    

    

# scaling back the function to actual temperature values
fn = ( max(df[ 'T' ].values)-min(df[ 'T' ].values) )*fn + min( df[ 'T' ].values ); nfn = fn
# ...similar for actual response and predicted response values
resp = (max(fn)-min(fn))*resp +min(fn)
xresp = (max(fn)-min(fn))*xresp +min(fn)


print( "Nb of samples:", nbSamples, "(", nbTrainingSamples, "samples used for training )" )
print( "Time:", (endt-startt), "(", trainingTime, "for training )" )
print( "RMSE:", ( np.sum((xresp-resp)**2) / resp.shape[ 0 ] )**0.5 )
print( "FRMSE:", ( np.sum((savgol_filter( xresp, 11, 5, delta=0.015 )-resp)**2) / resp.shape[ 0 ] )**0.5 )


def axvlines(xs, **plot_kwargs):
    """
    Draw vertical lines on plot
    :param xs: A scalar, list, or 1D array of horizontal offsets
    :param plot_kwargs: Keyword arguments to be passed to plot
    :return: The plot object corresponding to the lines.
    """
    xs = np.array((xs, ) if np.isscalar(xs) else xs, copy=False)
    lims = plt.gca().get_ylim()
    x_points = np.repeat(xs[:, None], repeats=3, axis=1).flatten()
    y_points = np.repeat(np.array(lims + (np.nan, ))[None, :], repeats=len(xs), axis=0).flatten()
    plot = plt.plot(x_points, y_points, scaley = False, **plot_kwargs)
    return plot




fig = plt.figure( 1, figsize = (20, 6), dpi = 50 )
ax1 = fig.add_subplot( 111 )

ax1.grid( True, color = 'green' )
mean = sum( fn )/fn.shape[ 0 ]; delta = 1.0e+2;  ax1.set_ylim( (mean-delta, mean+delta) )

#ax1.set_xlim( (1.505*1e+9, 1.507*1e+9) )

axvlines( lsindx, linewidth=0.5, color='gray', alpha=0.5 )

ax1.plot( predTstamps, np.abs(resp-xresp), 'blue', label = 'Error', alpha = 0.1)
ax1.plot( t, nfn, 'blue', label = 'Noised Fn' )
ax1.plot( t, fn, 'black', label = 'Support Fn') 


ax1.plot( predTstamps, xresp, 'green', label = 'Pred', alpha = 0.5 )
plt.scatter( predTstamps, xresp, color='blue', alpha = 0.2 )

ax1.plot( predTstamps, savgol_filter( xresp, 11, 5, delta=0.015 ), 'red', label = 'FPred', alpha = 0.9 )


ax1.legend(loc='upper right', frameon=False)

plt.show()