In [2]:
import numpy as np
import _pickle as pickle
import scipy.optimize as optimize
%matplotlib notebook
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import time

In [21]:
class NeuralNetwork(object):
    def initialize_weights(self,W1=0,W2=0,b1=0,b2=0):
        if type(W1)== np.ndarray : self.W1 = W1
        else : self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        if type(W2)== np.ndarray : self.W2 = W2
        else : self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        if type(b1)== np.ndarray : self.b1 = b1
        else : self.b1= np.random.randn(1,self.hiddenLayerSize)
        if type(b2)== np.ndarray : self.b2 = b2
        else : self.b2= np.random.randn(1,self.outputLayerSize)
            
    def __init__(self, Lambda=0,W1=0,W2=0,b1=0,b2=0):    
        #Define Hyperparameters
        self.inputLayerSize = 12
        self.outputLayerSize = 1
        self.hiddenLayerSize = 25
        
        self.initialize_weights(W1,W2,b1,b2)

        self.Lambda = Lambda 
        
    def forwardPropagation(self, X):
        self.z2 = np.dot(X, self.W1) + self.b1
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2) + self.b2
        yHat = self.z3 
        return yHat
        
    def sigmoid(self, z):
        return 1/(1+np.exp(-z))
    
    def relu(self,z):
        z[z<0] = 0
        return z
    
    def reluPrime(self,z):
        z[z<=0] = 0
        z[z>0] = 1
        return z
    
    def sigmoidPrime(self,z):
        #Gradient of sigmoid
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def costFunction(self, X, y):
        #Compute cost for given X,y, use weights already stored in class.
        self.yHat = self.forwardPropagation(X)
        j = (0.5*sum((y-self.yHat)**2) + (self.Lambda/2)*(np.sum(self.W1**2)+np.sum(self.W2**2)))/X.shape[0]
        return j
        
    def costFunctionPrime(self, X, y):
        #Compute derivative with respect to W and W2 for a given X and y:
        self.yHat = self.forwardPropagation(X)
        
        #delta3 = np.multiply(-(y-self.yHat), self.reluPrime(self.z3))
        delta3 = -(y-self.yHat)
        #Add gradient of regularization term:
        dJdW2 = (np.dot(self.a2.T, delta3) +  self.Lambda*self.W2)/X.shape[0]
        dJdb2 = np.sum(delta3, axis=0,keepdims=True)/len(delta3)
        
        #print(self.yHat.shape,dJdW2.shape,self.a2.T.shape)
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        #Add gradient of regularization term:
        dJdW1 = (np.dot(X.T, delta2) + self.Lambda*self.W1)/X.shape[0]
        dJdb1 = np.sum(delta2, axis=0,keepdims=True)/len(delta2)
        #print(dJdb1.shape)
        #print('\ndjdw1',dJdW1,'\ndjdw2',dJdW2,'\ndel3',delta3,'\ndel2',delta2)
        return dJdW1, dJdW2,dJdb1,dJdb2
    
    #Helper functions for interacting with other methods/classes
    def getParams(self):
        #Get W1 and W2 Rolled into vector:
        params = np.concatenate((self.W1.ravel(), self.W2.ravel(),self.b1.ravel(),self.b2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single parameter vector:
        W1_start = 0
        W1_end = self.hiddenLayerSize*self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end],(self.inputLayerSize, self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end],(self.hiddenLayerSize, self.outputLayerSize))
        b1_end = W2_end + self.hiddenLayerSize
        self.b1 = np.reshape(params[W2_end:b1_end],(1,self.hiddenLayerSize))
        b2_end = b1_end + self.outputLayerSize
        self.b2 = np.reshape(params[b1_end:b2_end],(1,self.outputLayerSize))
            
    def computeGradients(self, X, y):
        dJdW1, dJdW2, dJdb1, dJdb2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel(),dJdb1.ravel(),dJdb2.ravel()))
    
class Trainer(object):
    def __init__(self,N,restarts=1):
        self.N=N
        self.restarts=restarts
        
    def callbackF(self,params):
        self.N.setParams(params)
        self.J.append(self.N.costFunction(self.X,self.y))
        self.testJ.append(self.N.costFunction(self.testX,self.testY))
        
    def costFunctionWrapper(self,params,X,y):
        self.N.setParams(params)
        cost = self.N.costFunction(X,y)
        grad = self.N.computeGradients(X,y)
        return cost,grad
    
    def train(self,trainX,trainy,testX,testY):
        self.X,self.testX=trainX,testX
        self.y,self.testY=trainy,testY
        
        options = {'maxiter':3000,'disp':True,'gtol':1e-5,'iprint':0}
        min_loss=100
        for i in range(self.restarts):
            print('res ',i)
            self.J=[]
            self.testJ=[]
            if i!=0 : self.N.initialize_weights()
            params0=self.N.getParams()
            res = optimize.minimize(self.costFunctionWrapper,params0,jac=True,method='L-BFGS-B',args=(trainX,trainy),options=options,callback=self.callbackF)
            if res.fun<min_loss : min_loss,_res,self._J,self._testJ=res.fun,res,self.J,self.testJ
            
        self.N.setParams(_res.x)
        self.optimizationResults = _res

In [4]:
with open("dataa", "rb") as input_file:
   df = pickle.load(input_file)
df['CH2']=[np.nan]*len(df)
for i in range(len(df)):
    if df['CH2_SHELA'].iloc[i]!=-99 :
        if df['CH2_SPIES'].iloc[i]!=-99 :
            df['CH2'].iloc[i]=(df['CH2_SPIES'].iloc[i]+df['CH2_SHELA'].iloc[i])/2
        else :
            df['CH2'].iloc[i]=df['CH2_SHELA'].iloc[i]
    elif df['CH2_SPIES'].iloc[i]!=-99 :
        df['CH2'].iloc[i]=df['CH2_SPIES'].iloc[i]
        df['CH1']=[np.nan]*len(df)
for i in range(len(df)):
    if df['CH1_SHELA'].iloc[i]!=-99 :
        if df['CH1_SPIES'].iloc[i]!=-99 :
            df['CH1'].iloc[i]=(df['CH1_SPIES'].iloc[i]+df['CH1_SHELA'].iloc[i])/2
        else :
            df['CH1'].iloc[i]=df['CH1_SHELA'].iloc[i]
    elif df['CH1_SPIES'].iloc[i]!=-99 :
        df['CH1'].iloc[i]=df['CH1_SPIES'].iloc[i]
    else :
        df['CH1'].iloc[i]=np.nan
dff=df[['K','G','CH1','W1','U','Z','J','W2','I','H','R','CH2','REDSHIFT','REDSHIFT_ERR','ZWARNING']].dropna()
dff[['K','G','CH1','W1','U','Z','J','W2','I','H','R','CH2','REDSHIFT']]=dff[['K','G','CH1','W1','U','Z','J','W2','I','H','R','CH2','REDSHIFT']].replace(-99,np.nan).dropna()
dff=dff.where(dff['REDSHIFT_ERR']<dff['REDSHIFT']*0.1).dropna()
X_data,y_data=dff[['K','G','CH1','W1','U','Z','J','W2','I','H','R','CH2']],dff['REDSHIFT']

In [5]:
inscaler = MinMaxScaler()
Xs_data  = inscaler.fit_transform(X_data)

In [6]:
def modelrun(Lambda=0):
    N=int(len(Xs_data)/4)
    Jtrain,Jtest,Res=[],[],[]
    dz_val=[]
    dz_train=[]
    for i in range(4):
        Xs_train,y_train=list(Xs_data.copy()),list(y_data.copy())
        del Xs_train[i*N:i*N+N]
        del y_train[i*N:i*N+N]
        Xs_val,y_val=Xs_data[i*N:i*N+N],y_data[i*N:i*N+N]
        
        ouscaler= MinMaxScaler()
        ys_train= ouscaler.fit_transform(np.array(y_train).reshape(-1,1))
        ys_val  = ouscaler.transform(np.array(y_val).reshape(-1,1))
        
        Xs_train,Xs_val,ys_train,ys_val,y_val,y_train=np.array(Xs_train),np.array(Xs_val),np.array(ys_train),np.array(ys_val),np.array(y_val),np.array(y_train)
        X,y,testX,testY=Xs_train,ys_train,Xs_val,ys_val
        
        if i>0 : NN=NeuralNetwork(Lambda,W1,W2)
        else : NN=NeuralNetwork(Lambda)
        T=Trainer(NN)
        T.train(X,y,testX,testY)
        
        Jtrain.append(T.J)
        Jtest.append(T.testJ)
        Res.append(T.res)

        pred_train=ouscaler.inverse_transform(NN.forwardPropagation(X)).reshape(len(y_train))
        pred_val=ouscaler.inverse_transform(NN.forwardPropagation(testX)).reshape(len(y_val))
        dz_val.append(np.array((pred_val-y_val)/(1+y_val)))
        dz_train.append(np.array((pred_train-y_train)/(1+y_train)))
        W1,W2=T.N.W1,T.N.W2
    print('std train:',np.mean([np.std(dz_train[i]) for i in range(4)]),'\nmean train:',np.mean([np.mean(dz_train[i]) for i in range(4)]))
    print('std val:',np.mean([np.std(dz_val[i]) for i in range(4)]),'\nmean val:',np.mean([np.mean(dz_val[i]) for i in range(4)]))

    return Jtrain,Jtest,Res,dz_val,dz_train

In [41]:
info=[]
for reg in [0,0.1,1e-3,1e-5,1e-7]:
    print('Reg :',reg)
    start_time=time.time()
    #Jtrain,Jtest,Res,dz_val,dz_train=modelrun(reg)
    info.append(np.array(modelrun(reg,10)))
    print('evaluation time-',time.time()-start_time)

Reg : 0
std train: 0.20176853373062198 
mean train: 0.03348773360656025
std val: 0.20523369123409557 
mean val: 0.03312538896937171
evaluation time- 869.1334266662598
Reg : 0.1
std train: 0.21343552537833166 
mean train: 0.042453784611848615
std val: 0.21129712382950872 
mean val: 0.045074786982340104
evaluation time- 928.0013184547424
Reg : 0.001
std train: 0.1997223169117734 
mean train: 0.034873046328017555
std val: 0.20261522818273356 
mean val: 0.03325163191986223
evaluation time- 876.7002322673798
Reg : 1e-05
std train: 0.20116730803725855 
mean train: 0.034122790206609646
std val: 0.20366703495223037 
mean val: 0.03418241976287968
evaluation time- 841.2000370025635
Reg : 1e-07
std train: 0.20433813964703232 
mean train: 0.03735864836086973
std val: 0.20998651227183274 
mean val: 0.037304690559174715
evaluation time- 834.6970627307892


In [43]:
dz_train,dz_val=info[0][4],info[0][3]
print('std train:',[np.std(dz_train[i]) for i in range(4)],'\nmean train:',[np.mean(dz_train[i]) for i in range(4)])
print('std val:',[np.std(dz_val[i]) for i in range(4)],'\nmean val:',[np.mean(dz_val[i]) for i in range(4)])

std train: [0.202510779600136, 0.19748365342192123, 0.20724276590993992, 0.1998369359904908] 
mean train: [0.032725406695742004, 0.03132946041777407, 0.037039468679649704, 0.03285659863307523]
std val: [0.22428630402787758, 0.20663439186172664, 0.18389561407358862, 0.20611845497318942] 
mean val: [0.04118679158373159, 0.035212576089381706, 0.0343532918075553, 0.021748896396818238]


In [44]:
info[0][2]

array([      fun: array([0.00336583])
 hess_inv: <351x351 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 4.40177635e-04, -2.93032084e-05,  1.77396587e-06,  7.03136419e-06,
       -6.79350674e-05,  1.50096862e-04, -2.47588316e-05,  1.72436322e-05,
       -3.00149968e-06, -3.05045230e-04,  1.23833859e-05,  5.32738185e-05,
       -1.58274266e-05,  3.19196924e-05, -5.48402753e-05, -6.52742799e-05,
        4.31945480e-06,  1.78805874e-04, -1.38988008e-04,  5.16904570e-05,
       -3.22345495e-04, -2.36894907e-04,  1.45118569e-04,  1.11656498e-05,
       -2.07079279e-04,  4.17807717e-04, -3.66806327e-05,  1.91644603e-06,
        8.46032554e-06, -5.98973948e-05,  1.34850202e-04, -3.58273865e-05,
        1.37071773e-05, -4.75281717e-06, -2.73404713e-04,  1.57085516e-05,
        4.86829721e-05, -1.50333577e-05,  3.07289041e-05, -4.66439148e-05,
       -5.67055350e-05,  8.68010207e-06,  1.78365566e-04, -1.19393926e-04,
        6.67317985e-05, -3.01041949e-04, -1.64218463e-04,  1.39027

In [8]:
def modelrun(Lambda=0,restarts=1):
    N=int(len(Xs_data)/4)
    Jtrain,Jtest,Res=[],[],[]
    dz_val=[]
    dz_train=[]
    for i in range(4):
        Xs_train,y_train=list(Xs_data.copy()),list(y_data.copy())
        del Xs_train[i*N:i*N+N]
        del y_train[i*N:i*N+N]
        Xs_val,y_val=Xs_data[i*N:i*N+N],y_data[i*N:i*N+N]
        
        ouscaler= MinMaxScaler()
        ys_train= ouscaler.fit_transform(np.array(y_train).reshape(-1,1))
        ys_val  = ouscaler.transform(np.array(y_val).reshape(-1,1))
        
        Xs_train,Xs_val,ys_train,ys_val,y_val,y_train=np.array(Xs_train),np.array(Xs_val),np.array(ys_train),np.array(ys_val),np.array(y_val),np.array(y_train)
        X,y,testX,testY=Xs_train,ys_train,Xs_val,ys_val
        
        if i>0 : NN=NeuralNetwork(Lambda,W1,W2,b1,b2)
        else : NN=NeuralNetwork(Lambda)
        T=Trainer(NN,restarts)
        T.train(X,y,testX,testY)
        
        Jtrain.append(T._J)
        Jtest.append(T._testJ)
        Res.append(T.optimizationResults)

        pred_train=ouscaler.inverse_transform(NN.forwardPropagation(X)).reshape(len(y_train))
        pred_val=ouscaler.inverse_transform(NN.forwardPropagation(testX)).reshape(len(y_val))
        dz_val.append(np.array((pred_val-y_val)/(1+y_val)))
        dz_train.append(np.array((pred_train-y_train)/(1+y_train)))
        W1,W2,b1,b2=T.N.W1,T.N.W2,T.N.b1,T.N.b2
    print('std train:',np.mean([np.std(dz_train[i]) for i in range(4)]),'\nmean train:',np.mean([np.mean(dz_train[i]) for i in range(4)]))
    print('std val:',np.mean([np.std(dz_val[i]) for i in range(4)]),'\nmean val:',np.mean([np.mean(dz_val[i]) for i in range(4)]))

    return Jtrain,Jtest,Res,dz_val,dz_train,W1,W2,b1,b2
'''start_time=time.time()
Jtrain,Jtest,Res,dz_val,dz_train=modelrun(1e-5,10)
print('evaluation time-',time.time()-start_time)'''

"start_time=time.time()\nJtrain,Jtest,Res,dz_val,dz_train=modelrun(1e-5,10)\nprint('evaluation time-',time.time()-start_time)"

In [18]:
print('std train:',[np.std(dz_train[i]) for i in range(4)],'\nmean train:',[np.mean(dz_train[i]) for i in range(4)])
print('std val:',[np.std(dz_val[i]) for i in range(4)],'\nmean val:',[np.mean(dz_val[i]) for i in range(4)])

std train: [0.169271210139035, 0.16786072991384748, 0.16506634963117692, 0.1545308759383062] 
mean train: [0.026132419969089993, 0.02325598905427173, 0.02250073499739691, 0.020065259208952455]
std val: [0.18789424676612312, 0.2070898316491653, 0.1647836074114367, 0.17371766329939678] 
mean val: [0.05369458212699268, 0.041614187828280975, 0.014863892992484931, 0.01159802594270071]


In [17]:
#Jtrain,Jtest=info[0][0],info[0][1]
plt.figure()
i=3
plt.plot(Jtrain[i],label='train')
plt.plot(Jtest[i],label='test')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7ff10cfe2990>

In [11]:
start_time=time.time()
Jtrain,Jtest,Res,dz_val,dz_train,W1,W2,b1,b2=modelrun(0,1)
#info.append(np.array(modelrun(0,10)))
print('evaluation time-',time.time()-start_time)

res  0
res  0
res  0
res  0
std train: 0.1641822914055914 
mean train: 0.022988600807427772
std val: 0.18337133728153046 
mean val: 0.030442672222614823
evaluation time- 1340.150349855423


In [23]:
def modelrun(Lambda=0,restarts=1,W1=0,W2=0,b1=0,b2=0):
    N=int(len(Xs_data)/4)
    Jtrain,Jtest,Res=[],[],[]
    dz_val=[]
    dz_train=[]
    for i in range(4):
        Xs_train,y_train=list(Xs_data.copy()),list(y_data.copy())
        del Xs_train[i*N:i*N+N]
        del y_train[i*N:i*N+N]
        Xs_val,y_val=Xs_data[i*N:i*N+N],y_data[i*N:i*N+N]
        
        ouscaler= MinMaxScaler()
        ys_train= ouscaler.fit_transform(np.array(y_train).reshape(-1,1))
        ys_val  = ouscaler.transform(np.array(y_val).reshape(-1,1))
        
        Xs_train,Xs_val,ys_train,ys_val,y_val,y_train=np.array(Xs_train),np.array(Xs_val),np.array(ys_train),np.array(ys_val),np.array(y_val),np.array(y_train)
        X,y,testX,testY=Xs_train,ys_train,Xs_val,ys_val
        
        NN=NeuralNetwork(Lambda,W1,W2,b1,b2)
        T=Trainer(NN,restarts)
        T.train(X,y,testX,testY)
        
        Jtrain.append(T._J)
        Jtest.append(T._testJ)
        Res.append(T.optimizationResults)

        pred_train=ouscaler.inverse_transform(NN.forwardPropagation(X)).reshape(len(y_train))
        pred_val=ouscaler.inverse_transform(NN.forwardPropagation(testX)).reshape(len(y_val))
        dz_val.append(np.array((pred_val-y_val)/(1+y_val)))
        dz_train.append(np.array((pred_train-y_train)/(1+y_train)))
        W1,W2,b1,b2=T.N.W1,T.N.W2,T.N.b1,T.N.b2
    print('std train:',np.mean([np.std(dz_train[i]) for i in range(4)]),'\nmean train:',np.mean([np.mean(dz_train[i]) for i in range(4)]))
    print('std val:',np.mean([np.std(dz_val[i]) for i in range(4)]),'\nmean val:',np.mean([np.mean(dz_val[i]) for i in range(4)]))

    return Jtrain,Jtest,Res,dz_val,dz_train,W1,W2,b1,b2
start_time=time.time()
Jtrain,Jtest,Res,dz_val,dz_train,W1,W2,b1,b2=modelrun(0,1,W1,W2,b1,b2)
#info.append(np.array(modelrun(0,10)))
print('evaluation time-',time.time()-start_time)

res  0
res  0
res  0
res  0
std train: 0.14385138398537908 
mean train: 0.017771841405767083
std val: 0.15736940545469336 
mean val: 0.018278671305552517
evaluation time- 2098.8036212921143


In [20]:
Res[3]

      fun: array([0.00171949])
 hess_inv: <351x351 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 1.00783762e-05, -4.36188070e-08,  9.59063082e-06, -1.49828131e-09,
        7.39777301e-09,  5.32658321e-05,  1.73003967e-05,  3.75153246e-08,
        1.38187231e-07, -2.28181659e-08, -1.01893106e-07,  2.33100441e-08,
        2.31815801e-07, -8.71888500e-08,  1.02416770e-05, -8.48408492e-09,
       -9.17518406e-06,  5.71853182e-06, -3.77224622e-05, -2.66379335e-05,
        1.15049019e-07, -6.71101215e-08, -1.31956920e-07, -4.73040473e-06,
       -3.30481050e-06, -4.30684036e-06, -4.15841797e-09,  1.92263436e-06,
        1.58016530e-08,  2.03021418e-09,  6.69090067e-05,  2.38344247e-05,
       -3.65333724e-08,  8.32656437e-08,  9.00470038e-09, -1.13273491e-07,
       -6.33517404e-08,  2.81065713e-07, -1.31265521e-07,  2.51354478e-06,
       -1.06994147e-08, -1.25969521e-05,  8.36988275e-06, -9.04634914e-06,
       -3.86569267e-05,  2.07803405e-07,  1.22591907e-07, -2.41253611e-07

In [17]:
print('std train:',[np.std(dz_train[i]) for i in range(4)],'\nmean train:',np.abs([np.mean(dz_train[i]) for i in range(4)]))
print('std val:',[np.std(dz_val[i]) for i in range(4)],'\nmean val:',np.abs([np.mean(dz_val[i]) for i in range(4)]))

std train: [0.17258629245730223, 0.16570616599526178, 0.16501018670227346, 0.1612063182866698] 
mean train: [0.02873889 0.0225872  0.02416945 0.02199843]
std val: [0.2430584856418385, 0.20711618431743253, 0.16601741316176905, 0.17271801060344266] 
mean val: [0.03797506 0.04313295 0.02408865 0.00068956]


In [60]:
[(np.std(dz_val[i]),np.mean(dz_val[i])) for i in range(4)]

[(0.31368288370690167, 0.6494939815095458),
 (0.3016873068883493, 0.6578411327809536),
 (0.07685393076630762, -0.14287281458955192),
 (0.31089825650658026, 0.6467057261658093)]

In [8]:
Res[0]

      fun: array([0.00308256])
 hess_inv: <351x351 LbfgsInvHessProduct with dtype=float64>
      jac: array([-1.49577495e-06, -6.47481442e-07,  1.99438301e-06, -5.96022241e-07,
        5.26011782e-06, -2.39008076e-06,  1.69298721e-06, -5.07001433e-06,
       -7.16632030e-06, -6.69727280e-06, -8.75180423e-07,  2.85577305e-06,
        7.68102921e-06, -2.76562152e-05, -5.05699640e-06, -5.32981818e-06,
        1.78847400e-05,  1.07748858e-07, -8.94533611e-07,  9.91606192e-06,
        1.07747117e-06,  3.92906919e-07,  1.65779633e-06, -2.49678339e-06,
       -2.47706085e-06, -1.02823707e-06, -1.05880511e-06,  2.61110190e-06,
        2.67477328e-07,  8.47187524e-06, -4.11389700e-06,  5.62224657e-06,
       -1.00347439e-05, -1.60320546e-06,  1.01105487e-06,  3.85261548e-06,
        2.86047265e-06,  1.22586822e-05,  1.74613738e-05, -3.27657347e-06,
       -7.20058166e-06,  1.58761683e-05,  3.54385925e-07, -1.37009119e-06,
        9.40245134e-06,  4.75043774e-06,  5.82354537e-07,  1.54358525e-06

In [None]:
kj

In [None]:
kjnlnk

In [None]:
sad;  ;lasd 