<a href="https://colab.research.google.com/github/Tranminhtuan48/pythonbasics/blob/main/BTTL(4)_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
from scipy import optimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
def readData(folder , filename):
    data = np.loadtxt(os.path.join(folder, filename), delimiter = ',')
    X = data[:, :-1]
    y = data[:, -1]
    one = np.ones((X.shape[0], 1))
    X = np.concatenate((one, X), axis = 1)
    return X, y

4.1.3

In [4]:
def normScaling(X, y):
    for col in range(1, X.shape[1]):
        temp = X[:, col]
        X[:, col] = (temp - np.min(temp)) / (np.max(temp) - np.min(temp))
    temp = y
    y = (temp - np.min(temp)) / (np.max(temp) - np.min(temp))
    return X,y

In [5]:
def standardScaling(X, y):
    for col in range(1, X.shape[1]):
        temp = X[:, col]
        X[:, col] = (temp - np.mean(temp)) / (np.std(temp))
    temp = y
    y = (temp - np.mean(temp)) / (np.std(temp))
    return X,y

In [6]:
def predict(X, w):
    w = np.array(w)
    return np.dot(X, w)

In [8]:
def costFunction(w, X, y):
    m = X.shape[0]
    h_w = np.dot(X, w)
    J_w = (1/ (2*m)) * (np.sum(np.square(h_w - y)))
    return J_w

In [9]:
def linearRegression(X, y, w_init, method, iterations):
    result = optimize.minimize(fun=costFunction, x0=w_init, args=(X,y),
                               method= method,
                               options={"maxiter":iterations})
    return result.x, result.fun

In [10]:
def compareAlgorithms(X_train, y_train, X_test, y_test, algorithms):
    w_init = np.zeros((X_train.shape[1], 1))
    result = {}
    for algorithm in algorithms:
        w, loss = linearRegression(X_train, y_train, w_init, method = algorithm, iterations=1500)
        y_pred = predict(X_test, w)
        result[algorithm] = mean_squared_error(y_pred, y_test)
    return result

In [12]:
def main():
    X, y = readData("/content/gdrive/My Drive/", 'ex1data2.txt')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state = 5)

    X_train, y_train = standardScaling(X_train, y_train)
    X_test, y_test = standardScaling(X_test, y_test)
    '''Other algorithms:
    TNC, BFGS, L-BFGS-B, Nelder-Mead, Powell, CG, Newton-CG, COBYLA, SLSQP, ...'''
    comparison = compareAlgorithms(X_train, y_train, X_test, y_test, ['TNC', 'BFGS', 'L-BFGS-B'])
    print(comparison)

4.2.3

In [14]:
def normScaling(X, y):
    for col in range(1, X.shape[1]):
        temp = X[:, col]
        X[:, col] = (temp - np.min(temp)) / (np.max(temp) - np.min(temp))
    return X,y

In [15]:
def standardScaling(X, y):
    for col in range(1, X.shape[1]):
        temp = X[:, col]
        X[:, col] = (temp - np.mean(temp)) / (np.std(temp))
    return X,y

In [16]:
def predict(X, w):
    h_w = 1 / (1 + np.exp(- np.dot(X, w)))
    return (h_w >= 0.5).astype('int32')

In [17]:
def costFunction(w, X, y):
    m = X.shape[0]
    h_w = 1 / (1 + np.exp(- np.dot(X, w)))
    J_w = (-1/m) * (np.dot(y, np.log(h_w)) + np.dot((1-y), np.log(1-h_w)))
    return J_w

In [18]:
def logisticRegression(X, y, w_init, method, iterations):
    result = optimize.minimize(fun=costFunction, x0=w_init, args=(X,y),
                               method= method,
                               options={"maxiter":iterations})
    return result.x, result.fun

In [19]:
def compareAlgorithms(X_train, y_train, X_test, y_test, algorithms):
    w_init = np.zeros((X_train.shape[1], 1))
    result = {}
    for algorithm in algorithms:
        w, loss = logisticRegression(X_train, y_train, w_init, method = algorithm, iterations=1500)
        y_pred = predict(X_test, w)
        result[algorithm] = accuracy_score(y_pred, y_test)
    return result

In [21]:
def main():
    X, y = readData("/content/gdrive/My Drive/", 'ex2data1.txt')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
                                                        random_state = 5)
    X_train, y_train = standardScaling(X_train, y_train)
    X_test, y_test = standardScaling(X_test, y_test)
    '''Other algorithms:
    TNC, BFGS, L-BFGS-B, Nelder-Mead, Powell, CG, Newton-CG, COBYLA, SLSQP, ...'''
    comparison = compareAlgorithms(X_train, y_train, X_test, y_test, ['TNC', 'BFGS', 'L-BFGS-B'])
    print(comparison)