# ANN Local Fit

Overview:

1. Start with a specific set of kinematic variables.  
2. Initialize a ANN with 4 Kins as input and 3 CFFs as output
3. Pick random starting points for each parameter in ANN
4. Input data to ANN (using current params) to produce Fs
5. Compare resulting Fs to sampled Fs and compute mean squared error
6. Update ANN params in a direction that reduces that mean squared error
7. Repeat steps 3-5 until loss changes only very slightly

The autoreloader enables you to make edits in imported files and those edits will become immediately available.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import tensorflow as tf

from BHDVCStf import BHDVCS #modified bhdvcs file
import utilities as uts #general utilities that are useful for all methods

ModuleNotFoundError: No module named 'BHDVCStf'

## General global variable definitions

In [None]:
bhdvcs = BHDVCS()
df = pd.read_csv("dvcs_xs_newsets_genCFFs.csv")
data = uts.DvcsData(df)

numSets = 15
numReplicas = 30

## Define and compile model

This makes use of the tensorflow [functional api](https://www.tensorflow.org/guide/keras/functional)

In [None]:
kinematics = tf.keras.Input(shape=(4))
x = tf.keras.layers.Dense(20, activation="tanh")(kinematics)
outputs = tf.keras.layers.Dense(3)(x) #three output nodes for ReH, ReE, ReHtilde
noncffInputs = tf.keras.Input(shape=(8))
totalUUXSInputs = tf.keras.layers.concatenate([noncffInputs, outputs])
F = uts.TotalUUXSlayer()(totalUUXSInputs) # incorporate cross-sectional function

globalModel = tf.keras.Model(inputs=[kinematics, noncffInputs], outputs=F, name="GlobalModel")

In [None]:
tf.keras.utils.plot_model(globalModel, "cffs.png", show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [3]:
globalModel.compile(
    optimizer = tf.keras.optimizers.Adam(.02),
    loss = tf.keras.losses.MeanSquaredError(),
)

NameError: name 'globalModel' is not defined

In [7]:
Wsave = globalModel.get_weights()

## Produce CFFdists

In [8]:
print(Wsave[0])

[[ 0.45461202  0.13004005 -0.3103541  -0.29535282  0.01736736 -0.19027698
  -0.31239653 -0.28938556  0.39151454 -0.2282772  -0.35688543 -0.2055788
   0.27797914  0.36356652 -0.47698796 -0.08007777  0.48032832 -0.05126429
   0.02851105  0.24934685]
 [-0.4053923  -0.14213657  0.15523863 -0.37803543 -0.27249813  0.19910574
  -0.43728328  0.2038182   0.2767403  -0.18545818 -0.38632333  0.0950495
   0.4895363   0.45890403 -0.34969878 -0.13995385  0.07182288  0.17348886
  -0.1949985  -0.49814153]
 [-0.04843402 -0.2000761   0.15324438  0.03101838  0.34369493  0.22634327
   0.33307981 -0.12745702 -0.17760146  0.20931256 -0.04554152 -0.0431366
   0.41556132 -0.0598197   0.09484649 -0.24693549  0.0340966   0.0868088
   0.34481406  0.16683209]
 [-0.19319737 -0.07368028  0.23782241  0.15414655  0.24852145 -0.18098891
  -0.11772358 -0.37237895  0.4310181   0.41702974  0.05266297 -0.3283211
   0.05659044  0.3710413   0.25328064 -0.4333254   0.1821543  -0.19705248
   0.22926593  0.4559219 ]]


In [9]:
def produceCFFs(numReplicas, data, Wsave):
    '''
    :param numSamples: number of replicas to produce
    :param data: whole DvcsData
    :param Wsave: saved weights
    
    :returns: numpy array of shape (numSets, numReplicas, 3)
    '''

    by_sample = []

    for i in tqdm(range(max(data.df['#Set'])+1)):

        globalModel.set_weights(Wsave) # reset weights to original value

        setI = data.getSet(i) #DvcsData object containing specific set

        by_set = []

        for sample in range(numReplicas):
            
            #this callback enables us to roll the network back to its minimum loss during training
            
            globalModel.fit([setI.Kinematics, setI.XnoCFF], setI.sampleY(), # the sample Y will generate Fs
                        epochs=100, verbose=1)

            #globalModel.load_weights(chkpt_path) # load back minimum loss epoch

            cffs = uts.cffs_from_globalModel(globalModel, setI.Kinematics) # get cffs from middle model

            by_set.append(cffs)

        by_sample.append(by_set)
        
    return np.array(by_sample)

This took about 2 hours to run

In [10]:
results = produceCFFs(5, data, Wsave)

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 1/100


OSError: Unable to create file (unable to open file: name = 'networks\best-0.hdf5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 302)

# ReH

In [None]:
y_yhat, err = uts.y_yhat_errCFFs(data, results, 0)

In [None]:
uts.evaluate(y_yhat)

In [None]:
uts.plotError(y_yhat, err, "ReH")

# ReE

In [None]:
y_yhat, err = uts.y_yhat_errCFFs(data, results, 1)

In [None]:
uts.evaluate(y_yhat)

In [None]:
uts.plotError(y_yhat, err, "ReE")

# ReHtilde

In [None]:
y_yhat, err = uts.y_yhat_errCFFs(data, results, 2)
uts.evaluate(y_yhat)

In [None]:
uts.plotError(y_yhat, err, "ReHtilde")

# Propagated Fs at 180

In [None]:
y_yhat, err = uts.y_yhat_errFs(results, data)
uts.evaluate(y_yhat)

In [None]:
uts.plotError(y_yhat, err, "F")