In [3]:
import numpy as np
import utilities as uts
from TVA1_UU import TVA1_UU as BHDVCS
import pandas as pd
data = pd.read_csv('dvcs_xs_May-2021_342_sets_with_trueCFFs.csv')
bhdvcs = BHDVCS()
dvcsdata = DvcsData(data)

In [2]:
class DvcsData(object):
    def __init__(self, df):
        self.df = df
        self.X = df.loc[:, ['phi_x', 'k', 'QQ', 'x_b', 't', 'F1', 'F2', 'ReH', 'ReE', 'ReHTilde', 'dvcs']]
        self.XnoCFF = df.loc[:, ['phi_x', 'k', 'QQ', 'x_b', 't', 'F1', 'F2', 'dvcs']]
        #self.X = self.XnoCFF ReH,ReE and ReHtilde no longer in new data
        self.CFFs = df.loc[:, ['ReH', 'ReE', 'ReHTilde']] # ReH,ReE and ReHtilde no longer in new data
        self.y = df.loc[:, 'F']
        self.Kinematics = df.loc[:, ['k', 'QQ', 'x_b', 't']]
        self.erry = df.loc[:, 'sigmaF']
        
    def __len__(self):
        return len(self.X)
    
    def getSet(self, setNum, itemsInSet=45):
        pd.options.mode.chained_assignment = None
        subX = self.X.loc[setNum*itemsInSet:(setNum+1)*itemsInSet-1, :]
        subX['F'] = self.y.loc[setNum*itemsInSet:(setNum+1)*itemsInSet-1]
        subX['sigmaF'] = self.erry.loc[setNum*itemsInSet:(setNum+1)*itemsInSet-1]
        pd.options.mode.chained_assignment = 'warn'
        return DvcsData(subX)
    
    def sampleY(self):
        return np.random.normal(self.y, self.erry)
    
    def sampleWeights(self):
        return 1/self.erry
    
    def getAllKins(self, itemsInSets=45):
        return self.Kinematics.iloc[np.array(range(len(df)//itemsInSets))*itemsInSets, :]

In [4]:
def errFunc(data, cff):
    cff = np.reshape(cff, (-1,3))
    ReH = cff[:,0]
    ReE = cff[:,1]
    ReHT= cff[:,2]
    
    #print(np.shape(data.XnoCFF))
    dats = data.X
    k = np.array(dats['k'])
    qq = np.array(dats['QQ'])
    xb = np.array(dats['x_b'])
    t = np.array(dats['t'])
    phi = np.array(dats['phi_x'])
    F1 = np.array(dats['F1'])
    F2 = np.array(dats['F2'])
    const = np.array(dats['dvcs'])
    xdat = np.transpose(np.array([phi, k, qq, xb, t, F1, F2, const]))
    #print(np.shape(xdat))
    # idk why i need to use xdat instead of XnoCFF
    err = np.array([])
    for i in range(len(ReH)):
        calcF = bhdvcs.TotalUUXS(xdat,ReH[i],ReE[i],ReHT[i])
        err = np.append(err,np.mean(np.power(np.subtract(data.y,calcF),2)))
    return err
    

def nm(sets, epochs):
    alpha = 1
    gamma = 2
    rho = .5
    sigma = .5
    #startCFFs = np.array([[1.,1,1],[1,1,2],[1,2,1],[2,2,2]])
    startCFFs = np.random.random((4,3))*20-10
    for epoch in range(epochs):
        mse = errFunc(dvcsdata.getSet(sets),startCFFs)
        sort = np.argsort(mse)
        centroid = np.mean([startCFFs[i] for i in sort[0:-1]],axis = 0)
        centroidmse = errFunc(dvcsdata.getSet(sets),centroid)
        reflect = centroid - alpha * (centroid - startCFFs[sort[-1]])
        reflectmse = errFunc(dvcsdata.getSet(sets),reflect)
        if (mse[sort[0]] <= reflectmse) and (reflectmse < mse[sort[-2]]):
            startCFFs[sort[-1]] = reflect
            continue
        if (reflectmse < mse[sort[0]]):
            expand = centroid + gamma * (reflect - centroid)
            expandmse = errFunc(dvcsdata.getSet(sets),expand)
            if expandmse < reflectmse:
                startCFFs[sort[-1]] = expand
                continue
            else:
                startCFFs[sort[-1]] = reflect
                continue
        #if (reflectmse >= startCFFs[sort[-1]]):
        contract = np.array(centroid + rho * (startCFFs[sort[-1]] - centroid))
        contractmse = errFunc(dvcsdata.getSet(sets),contract)
        if contractmse < mse[sort[-1]]:
            startCFFs[sort[-1]] = contract
            continue
        for i in sort[1:]:
            startCFFs[i] = startCFFs[sort[0]] + sigma * (startCFFs[i] - startCFFs[sort[0]])
    
    return startCFFs

def readConfig(filename, lineno):
    global epochno, setno, replicas, alpha, gamma, rho, sigma, startCFFs
    configs = pd.read_csv(filename)
    config = configs.iloc[[lineno]]
    del configs
    if 'alpha' in config.columns:
        alpha = config['alpha'][lineno]
    if 'gamma' in config.columns:
        gamma = config['gamma'][lineno]
    if 'rho' in config.columns:
        rho = config['rho'][lineno]
    if 'sigma' in config.columns:
        sigma = config['sigma'][lineno] 
    if 'ReH' in config.columns:
        startCFFs[0] = config['ReH'][lineno] 
    if 'ReE' in config.columns:
        startCFFs[1] = config['ReE'][lineno]
    if 'ReHT' in config.columns:
        startCFFs[2] = config['ReHT'][lineno]
    if 'epochs' in config.columns:
        epochno = config['epochs'][lineno]
    if 'replicas' in config.columns:
        replicas = config['replicas'][lineno]
    if 'set' in config.columns:
        setno = config['set'][lineno]

In [5]:

#version for configs that returns a df with all the cffs for each iteration
def nm(sets, epochs, startCFF):
    totCFF = pd.DataFrame()
    CFFs = startCFF.copy()
    lastmove = ""
    for epoch in range(epochs):
        mse = errFunc(dvcsdata.getSet(sets),CFFs)
        sort = np.argsort(mse)
        #ranks = np.empty_like(sort)
        #ranks[sort] = np.arange(len(mse))
        df = pd.DataFrame(CFFs, columns = ['ReH', 'ReE', 'ReHT'])
        df['lastMove'] = lastmove
        df['epoch'] = epoch
        df['error'] = mse
        totCFF = totCFF.append(df)
        
        
        centroid = np.mean([CFFs[i] for i in sort[0:-1]],axis = 0)
        centroidmse = errFunc(dvcsdata.getSet(sets),centroid)
        reflect = centroid - alpha * (centroid - CFFs[sort[-1]])
        reflectmse = errFunc(dvcsdata.getSet(sets),reflect)
        if (mse[sort[0]] <= reflectmse) and (reflectmse < mse[sort[-2]]):
            CFFs[sort[-1]] = reflect
            lastmove = "reflection"
            continue
        if (reflectmse < mse[sort[0]]):
            expand = centroid + gamma * (reflect - centroid)
            expandmse = errFunc(dvcsdata.getSet(sets),expand)
            if expandmse < reflectmse:
                CFFs[sort[-1]] = expand
                lastmove = "expansion-expanded"
                continue
            else:
                CFFs[sort[-1]] = reflect
                lastmove = "expansion-reflected"
                continue
        #if (reflectmse >= CFFs[sort[-1]]):
        contract = np.array(centroid + rho * (CFFs[sort[-1]] - centroid))
        contractmse = errFunc(dvcsdata.getSet(sets),contract)
        if contractmse < mse[sort[-1]]:
            CFFs[sort[-1]] = contract
            lastmove = "contraction"
            continue
        for i in sort[1:]:
            CFFs[i] = CFFs[sort[0]] + sigma * (CFFs[i] - CFFs[sort[0]])
        lastmove = "shrink"
    
    
    return totCFF

In [53]:
epochno = 0
setno = 0
replicas = 0
alpha = 1
gamma = 2
rho = .5
sigma = .5
startCFFs = np.random.random((4,3))*20-10

filename = "nm/configs.csv"
linenum = 1
readConfig(filename, linenum)

print(epochno)
print(setno)
print(replicas)
replicas = 3
results = pd.DataFrame()
for replica in range(replicas):
    points = nm(setno,epochno,startCFFs)
    result = pd.DataFrame(points, columns=['ReH', 'ReE', 'ReHT'])
    result['index'] = range(0, len(result))
    result['replica'] = replica
    result['set'] = setno
    results = results.append(result)

#results.to_csv("nm/" + "ResultsConfig" +  "/Results"+ str(linenum) + "_" + str(setno) + "_" + str(epochno) + "_" + str(replicas) + ".csv")

100
0
100


In [43]:
def funcErr(cff, data):
    if np.shape(cff) != (3,):
        raise ValueError("CFF shape incorrect")
    return errFunc(data, cff)
from scipy import optimize as opt
scipyresult = opt.minimize(funcErr, np.array([0,0,0]), args = dvcsdata.getSet(2), method = 'Nelder-Mead', options = {'maxiter':10, 'maxfev': None, 'return_all':True, 'initial_simplex': np.random.random((4,3))*20-10, 'xatol': 0.000, 'fatol': 0.000})

In [56]:
test = pd.DataFrame(np.array(scipyresult.allvecs))
test['index'] = range(0,len(test))
test

Unnamed: 0,0,1,2,index
0,-4.99017,4.119096,2.213238,0
1,-0.752784,4.502163,-0.881991,1
2,1.543146,10.699964,-3.747921,2
3,1.543146,10.699964,-3.747921,3
4,0.42826,5.260403,-4.927809,4
5,0.407433,6.092318,-2.969484,5
6,0.407433,6.092318,-2.969484,6
7,1.204562,8.445265,-4.054921,7
8,0.776503,6.256295,-4.499384,8
9,1.279205,7.075134,-4.514866,9


In [None]:
configs = pd.read_csv(filename)
config = configs.iloc[[1]]
print(configs['epochs'].head())
print(config['alpha'][1])
del configs

In [74]:
test = pd.DataFrame(np.reshape((np.random.random((4,3))*20-10)[0],(-1,3)), columns = ['Reh','ree','reht'])
test['aaa'] = 'aa'
test.head()

#np.take(np.reshape(np.repeat(np.array(range(0,10)),10),(10,10)),np.array([1,2,0,3,4]),0)[0:-1]

Unnamed: 0,Reh,ree,reht,aaa
0,-2.872529,6.463088,0.919878,aa


In [72]:
test = np.array([1,0,2,3,-1])
test1 = np.argsort(test)
test2 = np.empty_like(test1)
test2[test1] = np.arange(len(test))
test2
test3 = pd.DataFrame(test,columns = ['a'])
test3['b'] = test2
test3['a'] = 10 + test3['a']
test3.head()

Unnamed: 0,a,b
0,11,2
1,10,1
2,12,3
3,13,4
4,9,0


In [107]:
test1 = pd.DataFrame([1])
def test():
    
    test1 = pd.DataFrame([14])
test()
test1

Unnamed: 0,0
0,1
