In [1]:
%load_ext autoreload
%autoreload 2

In [15]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm
import scipy.optimize as optimization
import itertools

from BHDVCStf import BHDVCS #modified bhdvcs file
import utilities as uts

In [5]:
bhdvcs = BHDVCS()

In [6]:
df = pd.read_csv("dvcs_psuedo.csv")
data = uts.DvcsData(df)

numSets = 15
numReplicas = 500

In [7]:
def random_poly(df, max_terms=10, max_coef=10):
    '''
    Calculate any cff from a random polynomial function of a random subset of kinematic variables
    '''
    numKins = np.random.randint(1, 5)
    kins = list(itertools.combinations(['x_b', 'QQ', 't', 'k'], numKins))
    kins = kins[np.random.randint(len(kins))]
    tot = np.zeros(len(df))
    for kin in kins:
        numTerms = np.random.randint(1, max_terms+1)
        exps = np.random.choice(range(max_terms), size=numTerms, replace=False)
        coefs = np.random.choice(range(-max_coef, max_coef+1), size=numTerms, replace=False)
        for c, e in zip(coefs, exps):
            tot += c*df[kin]**e
    return tot

In [8]:
def gen_pseudo(df, error=.05):
    dfdata = uts.DvcsData(df)

    tc = df.copy()
    tc['ReH'] = random_poly(tc)
    tc['ReE'] = random_poly(tc)
    tc['ReHtilde'] = random_poly(tc)
    #raise Error
    tc['F'] = bhdvcs.TotalUUXS(np.array(dfdata.XnoCFF), tc['ReH'], tc['ReE'], tc['ReHtilde'])
    tc['errF'] = tc['F']*error
    return tc

In [95]:
test = gen_pseudo(df)
test = uts.DvcsData(test.loc[test['F'] > 0, :])

In [96]:
test.df

Unnamed: 0,#Set,index,errF,F,phi_x,QQ,x_b,t,k,F1,F2,dvcs,ReH,ReE,ReHtilde
9,0,9,0.002317,0.046343,90,1.74013,0.435095,-0.380868,3.75,0.498060,0.68579,0.012288,-323.477095,2.982659e+05,-4.583377e+05
10,0,10,3.789471,75.789428,100,1.74013,0.435095,-0.380868,3.75,0.498060,0.68579,0.012288,-323.477095,2.982659e+05,-4.583377e+05
11,0,11,4.611602,92.232038,110,1.74013,0.435095,-0.380868,3.75,0.498060,0.68579,0.012288,-323.477095,2.982659e+05,-4.583377e+05
12,0,12,3.285083,65.701658,120,1.74013,0.435095,-0.380868,3.75,0.498060,0.68579,0.012288,-323.477095,2.982659e+05,-4.583377e+05
13,0,13,0.569475,11.389506,130,1.74013,0.435095,-0.380868,3.75,0.498060,0.68579,0.012288,-323.477095,2.982659e+05,-4.583377e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527,14,23,318.548410,6370.968209,230,2.63258,0.345012,-0.361188,7.75,0.512913,0.71481,0.012288,-3602.767777,1.562567e+08,-1.414718e+08
528,14,24,556.146548,11122.930969,240,2.63258,0.345012,-0.361188,7.75,0.512913,0.71481,0.012288,-3602.767777,1.562567e+08,-1.414718e+08
529,14,25,636.456262,12729.125231,250,2.63258,0.345012,-0.361188,7.75,0.512913,0.71481,0.012288,-3602.767777,1.562567e+08,-1.414718e+08
530,14,26,479.294506,9585.890125,260,2.63258,0.345012,-0.361188,7.75,0.512913,0.71481,0.012288,-3602.767777,1.562567e+08,-1.414718e+08


# Local Fits

In [32]:
def produceLocalResults(data, numSets, numReplicas):
    '''
    :param data: data of class DvcsData (which can be found in utilities.py)
    :param numSets: number of different kinematic sets contained in data
    :param numReplicas: number of replicas you would like to produce
    
    :returns: numpy array with shape (numSets, numReplicas, numCFFs)
    '''
    results = []
    for i in tqdm(range(numSets)):
        replicas = []
        seti = data.getSet(i) # DvcsData object of kinematic set i
        X = np.array(seti.XnoCFF) # the kinematics and all variables necessary to compute 
        sigma = seti.erry # error in F
        pars = np.array([1, 1, 1])
        for i in range(numReplicas):
            y = seti.sampleY()
            cff, cffcov = optimization.curve_fit(bhdvcs.TotalUUXS, X, y, pars, sigma, method='lm')
            replicas.append(cff)
        results.append(replicas)
    return np.array(results)

In [29]:
results = produceResults(test, 15, 100)

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))






KeyboardInterrupt: 

# Global Fit

In [16]:
kinematics = tf.keras.Input(shape=(4))
x = tf.keras.layers.Dense(20, activation="elu")(kinematics)
x = tf.keras.layers.Dense(20, activation="elu")(x)
outputs = tf.keras.layers.Dense(3)(x)

globalModel = tf.keras.Model(inputs=kinematics, outputs=outputs, name="GlobalModel")

In [None]:
X = data.Kinematics.loc[np.array(range(numSets))*36, :].reset_index(drop=True)

y = np.array(data.CFFs.loc[np.array(range(numSets))*36, :].reset_index(drop=True))

rescaler = MinMaxScaler()

rescaler = rescaler.fit(X)
X_rescaled = rescaler.transform(X)

globalModel.compile(optimizer=tf.keras.optimizers.Adam(.1), loss=tf.keras.losses.MeanSquaredError())
orig_weights = globalModel.get_weights()

In [None]:
def produceGlobalResults(model, X, localFits, orig_weights, numSets, numReplicas, epochs=150):
    '''
    Essentially LOO cross-val with y-values being generated from seperate local fit
    
    globalModel: a tensorflow neural network model
    X: standardized kinematic variables
    orig_weights: the original weights from when the model was created (used to reset model after it has been trained)
    numSets: the number of kinematic sets
    numReplicas: the number of replicas
    
    returns: np array of cff predictions of shape (numSets, numReplicas, numCFFs)
    '''
    by_set = []
    for i in tqdm(range(numSets)):
        valid_x = X[[i], :]
        train_x = np.delete(X, i, axis=0)
        
        by_rep = []
        for rep in range(numReplicas):
            train_y = np.delete(localFits[:, rep, :], i, axis=0)
            
            model.set_weights(orig_weights)
            model.fit(train_x, train_y, epochs=epochs, verbose=0)
            by_rep.append(list(model.predict(valid_x)[0]))
        
        by_set.append(by_rep)

    return np.array(by_set)

In [30]:
def testWithRandomData(kin_range_df, model, numSets, numReplicas):
    data = gen_pseudo(kin_range_df)
    data = uts.DvcsData(data.loc[data['F'] > 0, :])
    localFits = produceLocalResults(test, numSets, numReplicas)
    
    X = data.Kinematics.loc[np.array(range(numSets))*36, :].reset_index(drop=True)

    y = np.array(data.CFFs.loc[np.array(range(numSets))*36, :].reset_index(drop=True))

    rescaler = MinMaxScaler()

    rescaler = rescaler.fit(X)
    X_rescaled = rescaler.transform(X)

    globalModel.compile(optimizer=tf.keras.optimizers.Adam(.1), loss=tf.keras.losses.MeanSquaredError())
    orig_weights = globalModel.get_weights()
    
    cffhat_mat = produceGlobalResults(globalModel, X_rescaled, localFits, orig_weights, numSets, numReplicas)
    
    return cffhat_mat
    

In [33]:
test = testWithRandomData(df, globalModel, 15, 2)

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))






ValueError: `ydata` must not be empty!

In [None]:
y_yhat, err = uts.y_yhat_errCFFs(data, results, 0)