In [None]:
import torch
import sys 
import pickle
import pandas as pd
import os
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import sklearn as skl
import importlib
#Interface, Tools
import loadData
import plottingTools
import factorialModel
import teacher
import pytorchModel
import pytorchFwdModel
#Interpolation
import gaussianProcess

In [None]:
formerPath = sys.path
sys.path.append('./Code/')
sys.path.append('./nowcasting_torch/')

In [None]:
workingFolder = "./Data/SPX/"
filename = "SPX"
trainingSetPercentage = 0.8
minExpiry = -1.0
completionRate = 0.1 #Not used anymore

In [None]:
dataSet = loadData.dataSetATMPickle(workingFolder + filename, 
                                    trainingSetPercentage, 
                                    minExpiry, 
                                    completionRate, 
                                    scaleFeatures = False)

In [None]:
dataSet.datasetSummary()

In [None]:
dataSet.sanityCheck()

In [None]:
dataSet.getTrainingDataForModel()[0].shape

In [None]:
dataSet.setMaskedPoints(loadData.selectLessCorrelatedFeatures(dataSet.getTrainingDataForModel()[0].corr().dropna(axis=1), 40))

In [None]:
dataSet.maskDataset(dataSet.getTrainingDataForModel()[0].iloc[0])

In [None]:
plottingTools.plotGrid(dataSet.getTrainingDataForModel()[0].iloc[0],
                       dataSet.getTrainingDataForModel()[1].iloc[0],
                       "First training observation",
                       plotType="transparent")

In [None]:
originalData = dataSet.formatModelDataAsDataSet(dataSet.getTrainingDataForModel())
plottingTools.plotGrid(originalData[0].iloc[0],
                       originalData[1].iloc[0],
                       "First training observation")

In [None]:
learning_rate = 0.01
n_factors = 15
nbEpochs = 10000
nbCalibrationStep = 1000

hyperparameters = {}
hyperparameters["nbEpochs"] = nbEpochs
hyperparameters["verbose"] = False
hyperparameters["extrapolationMode"] = "NoExtrapolation"
hyperparameters["nbCalibrationStep"] = nbCalibrationStep
#hyperparameters["extrapolationMode"] = "InnerDomain"
#hyperparameters["extrapolationMode"] = "OuterDomain"

hyperparameters["nbX"] = dataSet.nbMoneyness
hyperparameters["nbY"] = dataSet.nbTTM

hyperparameters["mask"] = dataSet.maskSerie

#Penalizations
hyperparameters["l2_reg"] = 0.1
hyperparameters["varianceRegularisation"] = 1
hyperparameters["extremeRegularisation"] = 0
hyperparameters["lambdaContractive"] = 0.1
hyperparameters["lambdaGaussian"] = 1
hyperparameters["lambdaCompletionEncodings"] = 1
hyperparameters["lambdaDisentangle"] = 1.0
hyperparameters["lambdaTopology"] = 1.0
hyperparameters["factorVariance"] = 10
hyperparameters["GaussianEncodings"] = 1

#Gradient descent
hyperparameters["validationPercentage"] = 0.2
hyperparameters["earlyStoppingWindow"] = 0.2
hyperparameters["calibrationWindow"] = 20

hyperparameters["nbEpochInit"] = 1
hyperparameters["nbInit"] = 100

#Loss
hyperparameters["lossHolderExponent"] = 2

#Architecture
nbUnitsPerLayer = {}
nbUnitsPerLayer["Input Layer"] = dataSet.gridSize
nbUnitsPerLayer["Output Layer"] = nbUnitsPerLayer["Input Layer"]
nbUnitsPerLayer["Layer1"] = 10
nbUnitsPerLayer["Layer2"] = 10
nbUnitsPerLayer["Layer3"] = 10
nbUnitsPerLayer["Layer4"] = 10


nbUnitsPerLayer["LayerEncoder1"] = int(dataSet.gridSize / 2)
nbUnitsPerLayer["LayerEncoder2"] = int(nbUnitsPerLayer["LayerEncoder1"] / 2)
nbUnitsPerLayer["LayerEncoder3"] = int(nbUnitsPerLayer["LayerEncoder2"] / 2)

nbUnitsPerLayer["LayerDecoder1"] = nbUnitsPerLayer["LayerEncoder3"]
nbUnitsPerLayer["LayerDecoder2"] = nbUnitsPerLayer["LayerEncoder2"]
nbUnitsPerLayer["LayerDecoder3"] = nbUnitsPerLayer["LayerEncoder1"]
nbUnitsPerLayer["LayerDEcoder4"] = dataSet.gridSize

nbChannel = 1
hyperparameters["nbChannel"] = nbChannel

#Plot 
colorMapSystem = "hsv"
plotType = "transparent"#"flexibleWire"
diagnoseOriginalData = True

plt.rcParams["animation.embed_limit"] = 2**28
plt.rcParams["animation.html"] = "html5"
fps = 10

#Kernel
hyperparameters["bandwidthBounds"] = (0.01, 100)
hyperparameters["Train Interpolation"] = False 

In [None]:
importlib.reload(teacher)
importlib.reload(plottingTools)
importlib.reload(pytorchModel)

In [None]:
nbEpochs = 10000#10000
nbCalibrationStep = 1000 #1000
hyperparameters["nbCalibrationStep"] = nbCalibrationStep
hyperparameters["nbEpochs"] = nbEpochs

In [None]:
modelFunctional = pytorchModel.pytorchModel(learning_rate, 
                                            hyperparameters, 
                                            nbUnitsPerLayer, 
                                            n_factors)
learningManager = teacher.Teacher(modelFunctional, 
                                  dataSet, 
                                  nbEpochs, 
                                  nbCalibrationStep)
learningManager.diagnoseOriginalData = True
learningManager.colorMapSystem = "hsv"
learningManager.plotType = plotType

In [None]:
learningManager.fit(restoreResults = True)

In [None]:
learningManager.diagnoseCompression(restoreResults = False)

In [None]:
trueSurface = dataSet.getTestingDataForModel()[0].iloc[0,:]

inputTmp = [dataSet.maskDataset(trueSurface),
            dataSet.getTestingDataForModel()[1].iloc[0,:],
            dataSet.getTestingDataForModel()[2].iloc[0,:],
            None]
tmp = modelFunctional.completeDataTensor(inputTmp,
                                         learningManager.codings_Train.iloc[-1], 
                                         nbCalibrationStep)

print("Nb Masked points : ", 
      dataSet.maskDataset(trueSurface).dropna().shape)

plottingTools.plotCompletion(trueSurface, 
                             tmp[2], 
                             inputTmp[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurface).dropna())

res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)

trueSurfaceOther = dataSet.getTestingDataForModel()[0].iloc[0,:]

inputTmpOther = [dataSet.maskDataset(trueSurfaceOther),
                 dataSet.getTestingDataForModel()[1].iloc[0,:],
                 dataSet.getTestingDataForModel()[2].iloc[0,:],
                 None]
tmpOther = modelFunctional.completeDataTensor(inputTmpOther,
                                              learningManager.codings_Train.iloc[-1], 
                                              nbCalibrationStep)

print("Nb Masked points : ", 
      dataSet.maskDataset(trueSurfaceOther).dropna().shape)

plottingTools.plotCompletion(trueSurfaceOther, 
                             tmpOther[2], 
                             inputTmpOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurfaceOther).dropna())

resOther = learningManager.backTestCompletion()

dataSet.setMaskedPoints(cache)

In [None]:
importlib.reload(pytorchFwdModel)

In [None]:
modelFunctionalFwd = pytorchFwdModel.pytorchFwdModel(learning_rate, 
                                                     hyperparameters, 
                                                     nbUnitsPerLayer, 
                                                     n_factors)
learningManager.assignNewModel(modelFunctionalFwd)

In [None]:
learningManager.fit(restoreResults = True)

In [None]:
learningManager.diagnoseCompression(restoreResults = True)

In [None]:
print("Real vs completed")

trueSurface = dataSet.getTestingDataForModel()[0].iloc[0,:]
dataSet.maskDataset(trueSurface).dropna().size

inputTmp = [dataSet.maskDataset(trueSurface),
            dataSet.getTestingDataForModel()[1].iloc[0,:],
            dataSet.getTestingDataForModel()[2].iloc[0,:],
            None]
tmp = modelFunctionalFwd.completeDataTensor(inputTmp,
                                            learningManager.codings_Train.iloc[-1], 
                                            nbCalibrationStep)

plottingTools.plotCompletion(trueSurface, 
                             tmp[2], 
                             inputTmp[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurface).dropna())

inputOutlier = [tmp[2],
                dataSet.getTestingDataForModel()[1].iloc[0,:],
                dataSet.getTestingDataForModel()[2].iloc[0,:],
                None]
tmpOutlier = modelFunctionalFwd.completeDataTensor(inputOutlier,
                                                   pd.Series(np.zeros_like(tmp[1])), 
                                                   nbCalibrationStep)

print("Compressed completed vs completed")

plottingTools.plotCompletion(inputOutlier[0], 
                             tmpOutlier[2], 
                             inputOutlier[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurface).dropna())

print("Compressed completed vs real")
plottingTools.plotCompletion(trueSurface, 
                             tmpOutlier[2], 
                             inputOutlier[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurface).dropna())

corruptedSurface = trueSurface.copy()
corruptedSurface.iloc[150] = trueSurface.iloc[150] * 2
corruptedSurface.iloc[100] = trueSurface.iloc[100] * 2
corruptedSurface.iloc[200] = trueSurface.iloc[200] * 2
corruptedSurface.iloc[250] = trueSurface.iloc[250] * 2
inputCorrupted = [corruptedSurface,
                  dataSet.getTestingDataForModel()[1].iloc[0,:],
                  dataSet.getTestingDataForModel()[2].iloc[0,:],
                  None]
tmpCorrupted = modelFunctionalFwd.completeDataTensor(inputCorrupted,
                                                     pd.Series(np.zeros_like(tmp[1])), 
                                                     nbCalibrationStep)

print("Corrected corruption vs dummy corruption")
plottingTools.plotCompletion(inputCorrupted[0], 
                             tmpCorrupted[2], 
                             inputCorrupted[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = None)
print("Corrected corruption vs real")
plottingTools.plotCompletion(trueSurface, 
                             tmpCorrupted[2], 
                             inputOutlier[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = None)

res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)

print("Real vs completed")

trueSurfaceOther = dataSet.getTestingDataForModel()[0].iloc[0,:]
dataSet.maskDataset(trueSurfaceOther).dropna().size

inputTmpOther = [dataSet.maskDataset(trueSurfaceOther),
                 dataSet.getTestingDataForModel()[1].iloc[0,:],
                 dataSet.getTestingDataForModel()[2].iloc[0,:],
                 None]
tmpOther = modelFunctionalFwd.completeDataTensor(inputTmpOther,
                                                 learningManager.codings_Train.iloc[-1], 
                                                 nbCalibrationStep)

plottingTools.plotCompletion(trueSurfaceOther, 
                             tmpOther[2], 
                             inputTmpOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurfaceOther).dropna())

inputOutlierOther = [tmpOther[2],
                     dataSet.getTestingDataForModel()[1].iloc[0,:],
                     dataSet.getTestingDataForModel()[2].iloc[0,:],
                     None]
tmpOutlierOther = modelFunctionalFwd.completeDataTensor(inputOutlierOther,
                                                        pd.Series(np.zeros_like(tmpOther[1])), 
                                                        nbCalibrationStep)

print("Compressed completed vs completed")

plottingTools.plotCompletion(inputOutlierOther[0], 
                             tmpOutlierOther[2], 
                             inputOutlierOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurfaceOther).dropna())

print("Compressed completed vs real")
plottingTools.plotCompletion(trueSurfaceOther, 
                             tmpOutlierOther[2], 
                             inputOutlierOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = dataSet.maskDataset(trueSurfaceOther).dropna())

corruptedSurfaceOther = trueSurfaceOther.copy()
corruptedSurfaceOther.iloc[150] = trueSurfaceOther.iloc[150] * 2
corruptedSurfaceOther.iloc[100] = trueSurfaceOther.iloc[100] * 2
corruptedSurfaceOther.iloc[200] = trueSurfaceOther.iloc[200] * 2
corruptedSurfaceOther.iloc[250] = trueSurfaceOther.iloc[250] * 2
inputCorruptedOther = [corruptedSurfaceOther,
                       dataSet.getTestingDataForModel()[1].iloc[0,:],
                       dataSet.getTestingDataForModel()[2].iloc[0,:],
                       None]
tmpCorruptedOther = modelFunctionalFwd.completeDataTensor(inputCorruptedOther,
                                                          pd.Series(np.zeros_like(tmpOther[1])), 
                                                          nbCalibrationStep)

print("Corrected corruption vs dummy corruption")
plottingTools.plotCompletion(inputCorruptedOther[0], 
                             tmpCorruptedOther[2], 
                             inputCorruptedOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = None)
print("Corrected corruption vs real")
plottingTools.plotCompletion(trueSurfaceOther, 
                             tmpCorruptedOther[2], 
                             inputOutlierOther[1],
                             colorMapSystem=learningManager.colorMapSystem, 
                             plotType=learningManager.plotType,
                             refPoints = None)

thetaSurface = modelFunctionalFwd.getArbitrageTheta(inputCorruptedOther, 
                                                    pd.Series(tmpCorruptedOther[1]))
plottingTools.plotGrid(thetaSurface.iloc[0],
                       inputCorruptedOther[1],    
                       "Calendar condition for worst reconstruction on testing dataset", 
                       colorMapSystem=learningManager.colorMapSystem, 
                       plotType=learningManager.plotType,
                       refPoints = None,
                       zLabelUser = "Implied total variance Theta")

resOther = learningManager.backTestCompletion()

dataSet.setMaskedPoints(cache)

In [None]:
thetaSurface.min().min()


In [None]:
modelInterpolationLinear = gaussianProcess.LinearInterpolation(learning_rate, 
                                                               hyperparameters, 
                                                               nbUnitsPerLayer, 
                                                               n_factors)
learningManager.assignNewModel(modelInterpolationLinear)

In [None]:
learningManager.fit()

In [None]:
learningManager.diagnoseCompression()

In [None]:
res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)
res = learningManager.backTestCompletion()
dataSet.setMaskedPoints(cache)

In [None]:
modelInterpolationSpline = gaussianProcess.SplineInterpolation(learning_rate, 
                                                               hyperparameters, 
                                                               nbUnitsPerLayer, 
                                                               n_factors)
learningManager.assignNewModel(modelInterpolationSpline)

In [None]:
learningManager.fit()

In [None]:
learningManager.diagnoseCompression()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)
res = learningManager.backTestCompletion()
dataSet.setMaskedPoints(cache)

In [None]:
modelGaussianProcessNoExtrapolation = gaussianProcess.GaussianProcess(learning_rate, 
                                                                      hyperparameters, 
                                                                      nbUnitsPerLayer, 
                                                                      n_factors)
learningManager.assignNewModel(modelGaussianProcessNoExtrapolation)

In [None]:
learningManager.fit()

In [None]:
learningManager.diagnoseCompression()

In [None]:
res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)
res = learningManager.backTestCompletion()
dataSet.setMaskedPoints(cache)

In [None]:
hyperparameters["extrapolationMode"] = "OuterDomain"
modelGaussianProcessOuterExtrapolation = gaussianProcess.GaussianProcess(learning_rate, 
                                                                         hyperparameters, 
                                                                         nbUnitsPerLayer, 
                                                                         n_factors)
learningManager.assignNewModel(modelGaussianProcessOuterExtrapolation)

In [None]:
learningManager.fit()

In [None]:
learningManager.diagnoseCompression()

In [None]:
res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)
res = learningManager.backTestCompletion()
dataSet.setMaskedPoints(cache)

In [None]:
hyperparameters["extrapolationMode"] = "NoExtrapolation"

In [None]:
hyperparameters["extrapolationMode"] = "InnerDomain"
modelGaussianProcessInnerExtrapolation = gaussianProcess.GaussianProcess(learning_rate, 
                                                                         hyperparameters, 
                                                                         nbUnitsPerLayer, 
                                                                         n_factors)
learningManager.assignNewModel(modelGaussianProcessInnerExtrapolation)

In [None]:
learningManager.fit()

In [None]:
learningManager.diagnoseCompression()

In [None]:
res = learningManager.backTestCompletion()

In [None]:
cache = dataSet.getDataForModel()[0].columns.difference(dataSet.maskedPoints)
otherMaskedPoints = pd.Int64Index([92,  95, 104, 130, 131, 136, 164, 177, 178, 185, 188, 207, 208,
                                   208, 235, 245, 257, 273, 286, 290, 307, 309, 316, 325, 326, 329,
                                   334, 344, 367, 373, 374, 391, 392, 406, 409, 418])
dataSet.setMaskedPoints(otherMaskedPoints)
res = learningManager.backTestCompletion()
dataSet.setMaskedPoints(cache)

In [None]:
hyperparameters["extrapolationMode"] = "NoExtrapolation"