In [76]:
def sigmoid(prediction):
    '''
    Sigmoid activation function
    '''
    return 1. / (1. + np.exp(-prediction))

In [77]:
def relu_(prediction):
    '''
    Relu activation function
    '''
    return prediction

In [78]:
def cost_mse_ols(design, data, beta):
    '''
    Mean squared error
    '''
    return (data - design.dot(beta)).T*(data - design.dot(beta))

In [79]:
def cost_grad_ols(design, data, beta):
    '''
    Calculates the first derivative of MSE w.r.t beta.
    '''
    return (2/len(data))*design.T.dot(design.dot(beta)-data) #logistic regression slides

In [80]:
def cost_log_ols(design, data, beta):
    '''
    Logisitic regression cost function
    '''
    return -data.dot(np.log(prediction)+1e-10) - ((1-data).dot(np.log(1-prediction + 1e-10)))

In [81]:
def cost_grad_log_ols(design, data, p):
    '''
    Gradient w.r.t log
    '''
    return (1/len(data))*design.T.dot(data-p)

In [82]:
def cost_mse_rid(design, data, beta, _lambda=1e-07):
    '''
    Mean squared error
    '''
    return (data - design.dot(beta)).T*(data - design.dot(beta)) + _lambda(np.sum(beta)**2)

In [83]:
def cost_grad_rid(design, data, beta, _lambda=1e-07):
    '''
    Calculates the first derivative of MSE w.r.t beta.
    '''
    regu_term = _lambda*np.sum(beta**2) 
    return (2/len(data))*design.T.dot(design.dot(beta)-data) + _lambda*np.sum(beta**2) + regu_term 

In [84]:
def cost_log_rid(design, data, beta, _lambda=1e-07):
    '''
    Logisitic regression cost function
    '''
    regu_term = _lambda*np.sum(beta**2) 
    return -data.dot(np.log(prediction)+1e-10) - ((1-data).dot(np.log(1-prediction + 1e-10))) + regu_term

In [85]:
def cost_grad_log_rid(design, data, p, beta, _lambda=1e-07):
    '''
    Gradient w.r.t log
    '''
    return (1/len(data))*design.T.dot(data-p) +2*_lambda*beta

In [86]:
def gradient_solver(N, eta, design, data, beta=None):
    M=len(data)
    if beta != None:
        beta = beta
    else:
        beta = np.random.randn(design.shape[1])
     
    for i in range(N):
        gradients = cost_grad_ols(design,frank,beta)
        beta -= eta*gradients
    return beta


In [87]:

import functions_class as fx
import classx as cl
import matplotlib.pyplot as plt
import numpy as np


n_x         = 50
x           = np.linspace(0, 1, n_x)
y           = np.linspace(0, 1, n_x)

x_mesh, y_mesh  = np.meshgrid(x,y)
noise_level     = 0.01
frank           = fx.FrankeFunction(x_mesh, y_mesh, noise_level)

frank = np.ravel(frank)



In [88]:
design = fx.DesignDesign(x,y,10)
data = frank.reshape([n_x*n_x,1])
np.random.seed(2018)
M=len(data)
N=10000
eta=0.1

beta = gradient_solver(N, eta, design, data)


prediction = design @ beta
pred = np.reshape(prediction,[n_x,n_x])



In [89]:
import pandas as pd
import os
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score

# Trying to set the seed
np.random.seed(0)
import random
random.seed(0)

# Reading file into data frame
directory = os.getcwd()
filename = directory + '/cred_card.xls'
nanDict = {} # fjerner NaN 
dataframe = pd.read_excel(filename, header=1, skiprows=0, index_col=0, na_values=nanDict)


dataframe.rename(index=str, columns={"default payment next month": "defaultPaymentNextMonth"}, inplace=True)

# Features and targets 
X = dataframe.loc[:, dataframe.columns != 'defaultPaymentNextMonth'].values
y = dataframe.loc[:, dataframe.columns == 'defaultPaymentNextMonth'].values

# Categorical variables to one-hot's
onehotencoder = OneHotEncoder(categories="auto")

X = ColumnTransformer(
    [("", onehotencoder, [3]),],
    remainder="passthrough"
).fit_transform(X)



# Train-test split
trainingShare = 0.5 
seed  = 1
XTrain, XTest, yTrain, yTest=train_test_split(X, y, train_size=trainingShare, \
                                              test_size = 1-trainingShare,
                                             random_state=seed)

# Input Scaling
sc = StandardScaler()
XTrain = sc.fit_transform(XTrain)
XTest = sc.transform(XTest)

# One-hot's of the target vector
Y_train_onehot, Y_test_onehot = onehotencoder.fit_transform(yTrain), onehotencoder.fit_transform(yTest)


# Remove instances with zeros only for past bill statements or paid amounts
'''
dataframe = dataframe.drop(dataframe[(dataframe.BILL_AMT1 == 0) &
                (dataframe.BILL_AMT2 == 0) &
                (dataframe.BILL_AMT3 == 0) &
                (dataframe.BILL_AMT4 == 0) &
                (dataframe.BILL_AMT5 == 0) &
                (dataframe.BILL_AMT6 == 0) &
                (dataframe.PAY_AMT1 == 0) &
                (dataframe.PAY_AMT2 == 0) &
                (dataframe.PAY_AMT3 == 0) &
                (dataframe.PAY_AMT4 == 0) &
                (dataframe.PAY_AMT5 == 0) &
                (dataframe.PAY_AMT6 == 0)].index)
'''
dataframe = dataframe.drop(dataframe[(dataframe.BILL_AMT1 == 0) &
                (dataframe.BILL_AMT2 == 0) &
                (dataframe.BILL_AMT3 == 0) &
                (dataframe.BILL_AMT4 == 0) &
                (dataframe.BILL_AMT5 == 0) &
                (dataframe.BILL_AMT6 == 0)].index)

dataframe = dataframe.drop(dataframe[(dataframe.PAY_AMT1 == 0) &
                (dataframe.PAY_AMT2 == 0) &
                (dataframe.PAY_AMT3 == 0) &
                (dataframe.PAY_AMT4 == 0) &
                (dataframe.PAY_AMT5 == 0) &
                (dataframe.PAY_AMT6 == 0)].index)


In [90]:
#from sklearn.linear_model import LogisticRegression
#from sklearn.model_selection import GridSearchCV

#lambdas=np.logspace(-5,7,13)
#parameters = [{'C': 1./lambdas, "solver":["lbfgs"]}]#*len(parameters)}]
#scoring = ['accuracy', 'roc_auc']
#logReg = LogisticRegression()
#gridSearch = GridSearchCV(logReg, parameters, cv=5, scoring=scoring, refit='roc_auc')

In [91]:
from sklearn.linear_model import SGDClassifier
y=np.squeeze(y)
logreg = SGDClassifier(max_iter = 100000, penalty=None, eta0=0.1, learning_rate='constant' )
logreg.fit(X,y)
prediction = logreg.predict(X)


In [92]:
count = np.count_nonzero(y-prediction)

In [93]:
accuracy = (len(y)-count)/len(y)* 100
print(accuracy, '%') 

56.79 %


# Egen logistic regression.
Ulik prøving med dif deffinisjoner.. 

In [98]:
def sigmoid(x):
    # Activation function used to map any real value between 0 and 1
    return 1 / (1 + np.exp(-x))

def net_input(eta, x):
    # Computes the weighted sum of inputs
    return np.dot(x, eta)

def probability(eta, x):
    '''
    Returns the probability after passing through sigmoid
    '''
    return sigmoid(net_input(eta, x))


def cost_grad_ols(design, data, beta):
    '''
    Calculates the first derivative of MSE w.r.t beta.
    '''
    return (2/len(data))*design.T.dot(design.dot(beta)-data) #logistic regression slides

In [113]:
eta = 0.0001 # This is out eta
m = 10

Niteration = 100
beta = np.random.randn(26,1)
for iter in range(Niteration):
    sigmoid = 1/(1+np.exp(-(XTrain)@(beta))) # vi har ikke definert prediction i vår sigmoid definisjon. 
    gradients = -(np.transpose(XTrain)@(yTrain-sigmoid))
    beta -= eta*gradients
  
    # Cost function
    
    #total_cost = -(1 / m) * np.sum(y @ np.log(sigmoid(X))) + (1 - y) @ np.log(1 - sigmoid(X)))
    
    cost = -np.sum(np.transpose(yTrain)@np.log(sigmoid) + np.transpose(1-yTrain)@np.log(1-sigmoid))
    print('cost is', cost)

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


cost is nan
cost is nan
cost is nan
cost is nan
cost is inf
cost is inf
cost is inf
cost is inf
cost is 12581.965949878391
cost is 12282.04588155842
cost is 12010.683628680812
cost is 11764.255185164884
cost is 11540.05605997125
cost is 11336.05820554089
cost is 11150.692810989127
cost is 10982.639902069352
cost is 10830.643687111677
cost is 10693.409294151574
cost is 10569.597391258292
cost is 10457.848500383821
cost is 10356.778349841446
cost is 10265.02066402383
cost is 10181.50341010486
cost is 10105.880160610204
cost is 10038.77169687258
cost is 9981.509144798349
cost is 9935.239476981364
cost is 9899.956751541346
cost is 9874.257739722463
cost is 9855.847431540107
cost is 9842.36250665162
cost is 9831.973072886545
cost is 9823.505965651566
cost is 9816.284452461186
cost is 9809.931919816236
cost is 9804.234605176425
cost is 9799.063474286757
cost is 9794.333742361212
cost is 9789.9846283096
cost is 9785.969303585789
cost is 9782.249755644021
cost is 9778.794075916852
cost is 9775

# Accuracy. 
Både egen kode og tester med scikit. 

In [114]:
def predict(self, x):
    theta = parameters[:, np.newaxis]
    return probability(theta, x)

def accuracy(self, x, actual_classes, probab_threshold=0.5):
    predicted_classes = (predict(x) >= 
                         probab_threshold).astype(int)
    predicted_classes = predicted_classes.flatten()
    accuracy = np.mean(predicted_classes == actual_classes)
    return accuracy * 100
accuracy(X, y.flatten())

TypeError: accuracy() missing 1 required positional argument: 'actual_classes'

In [119]:
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score 

model = LogisticRegression()
model.fit(X, y)
predicted_classes = model.predict(X)
accuracy = accuracy_score(y.flatten(),predicted_classes)
accuracy = accuracy * 100
parameters = model.coef_



In [120]:
#print(parameters)
print(accuracy, '%')

77.87333333333333 %
