In [20]:
import numpy as np
import util

class GDA:
    """Gaussian Discriminant Analysis.

    Example usage:
        > clf = GDA()
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """
    def __init__(self, step_size=0.01, max_iter=10000, eps=1e-5,
                 theta_0=None, verbose=True):
        """
        Args:
            step_size: Step size for iterative solvers only.
            max_iter: Maximum number of iterations for the solver.
            eps: Threshold for determining convergence.
            theta_0: Initial guess for theta. If None, use the zero vector.
            verbose: Print loss values during training.
        """
        self.theta = theta_0
        self.step_size = step_size
        self.max_iter = max_iter
        self.eps = eps
        self.verbose = verbose

    def calc_phi(self, y, n):
        count = 0
        for yi in y:
            if yi == 1: 
                count += 1
        
        return count / n
    
    def calc_mu_0(self, x, y, n):
        x_sum = np.zeros(1, len(x[0])
        y_count = 0
        for i in range(n):
            if y[i] == 0:
                x_sum = np.add(x_sum, np.array(x[i]).reshape(1,len(x[0]))
                y_count += 1
        
        return np.divide(x_sum, y_count)
    
    def calc_mu_1(self, x, y, n):
        x_sum = np.zeros(1, len(x[0]))
        y_count = 0
        for i in range(n):
            if y[i] == 1:
                x_sum = np.add(x_sum, np.array(x[i]).reshape(1, len(x[0]))
                y_count += 1
        
        return np.divide(x_sum, y_count)

    def calc_sigma(self, x, y, n, mu_0, mu_1):
        d = len(x[0])
        sigma = np.zeros(d, d) #  d x d
        for i in range(n):
            if y[i] == 0:
                xi = np.array(x[i]).reshape(1, len(x[0]))
                sigma = np.add(np.dot(np.substract(xi, mu_0), np.subract(xi, mu_0).T))
            elif y[i] == 1:
                xi = np.array(x[i]).reshape(1, len(x[0]))
                sigma = np.add(np.dot(np.substract(xi, mu_1), np.subract(xi, mu_1).T))
            
        return np.divide(sigma, n)
            
    def fit(self, x, y):
        """Fit a GDA model to training set given by x and y by updating
        self.theta.

        Args:
            x: Training example inputs. Shape (n_examples, dim).
            y: Training example labels. Shape (n_examples,).
        """
        # *** START CODE HERE ***
        # Find phi, mu_0, mu_1, and sigma
        n = len(y)
        phi = calc_phi(y, n)
        mu_0 = calc_mu_0(x, y, n)       # average of all features for y = 0 label
        mu_1 = calc_mu_1(x, y, n)       # average of all features for y = 1 label
        sigma = calc_sigma(x, y, n, mu_0, mu_1)
        # Write theta in terms of the parameters
        
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given new inputs x.

        Args:
            x: Inputs of shape (n_examples, dim).

        Returns:
            Outputs of shape (n_examples,).
        """
        # *** START CODE HERE ***
        # *** END CODE HERE



IndentationError: expected an indented block (<ipython-input-20-5f71c4456d6b>, line 61)

In [21]:
def main(train_path, valid_path, save_path):
    """Problem: Gaussian discriminant analysis (GDA)

    Args:
        train_path: Path to CSV file containing dataset for training.
        valid_path: Path to CSV file containing dataset for validation.
        save_path: Path to save predicted probabilities using np.savetxt().
    """
    # Load dataset
    x_train, y_train = util.load_dataset(train_path, add_intercept=False)
    
    print(len(x_train[0]))
#     print(x_train[0].shape[0], y_train[0])
    x = np.zeros((1, 2))
    print(np.array(x_train[0]).reshape(1, 2))
    x = np.add(x, x_train[0])
    x = np.add(x, x_train[0])
    print(x)
    # *** START CODE HERE ***
    # Train a GDA classifier
    # Plot decision boundary on validation set
    # Use np.savetxt to save outputs from validation set to save_path
    # *** END CODE HERE ***


if __name__ == '__main__':
    main(train_path='../../cs229/ps1/src/linearclass/ds1_train.csv',
         valid_path='../../cs229/ps1/src/linearclass/ds1_valid.csv',
         save_path='../../cs229/ps1/src/linearclass/gda_pred_1.txt')

#     main(train_path='../../cs229/ps1/src/linearclass/ds2_train.csv',
#          valid_path='../../cs229/ps1/src/linearclass/ds2_valid.csv',
#          save_path='../../cs229/ps1/src/linearclass/gda_pred_2.txt')


2
[[0.41180854 1.10552487]]
[[0.82361707 2.21104974]]
