In [1]:
import numpy as np
import util

In [3]:
train_path='train.csv'
x_train, y_train = util.load_dataset(train_path, add_intercept=True)

In [4]:
x_train

array([[1.        , 1.        , 0.        , 2.97614241, 0.65148205],
       [1.        , 0.        , 1.        , 1.4113903 , 0.74373156],
       [1.        , 0.        , 1.        , 1.03989184, 1.2905879 ],
       ...,
       [1.        , 0.        , 1.        , 1.49124324, 0.84115559],
       [1.        , 0.        , 1.        , 2.8631773 , 1.13793409],
       [1.        , 0.        , 1.        , 1.82561719, 0.05930945]])

In [24]:
class PoissonRegression:
    """Poisson Regression.

    Example usage:
        > clf = PoissonRegression(step_size=lr)
        > clf.fit(x_train, y_train)
        > clf.predict(x_eval)
    """

    def __init__(self, step_size=1e-5, max_iter=10000000, eps=1e-5,
                 theta_0=None, verbose=True):
        """
        Args:
            step_size: Step size for iterative solvers only.
            max_iter: Maximum number of iterations for the solver.
            eps: Threshold for determining convergence.
            theta_0: Initial guess for theta. If None, use the zero vector.
            verbose: Print loss values during training.
        """
        self.theta = theta_0
        self.step_size = step_size
        self.max_iter = max_iter
        self.eps = eps
        self.verbose = verbose

    def fit(self, x, y):
        """Run gradient ascent to maximize likelihood for Poisson regression.

        Args:
            x: Training example inputs. Shape (n_examples, dim).
            y: Training example labels. Shape (n_examples,).
        """
        # *** START CODE HERE ***
        n, d = x.shape
        if self.theta is None:
            self.theta = np.zeros(d)

        step = 0
        diff = 0.1
        while step <= self.max_iter and diff >= self.eps:
            theta_prev = self.theta.copy()
            eta = np.matmul(x, self.theta)
            update = np.matmul((y-np.exp(eta)),x)
            self.theta += self.step_size*update
            
            diff = np.linalg.norm(self.theta - theta_prev)
            step += 1
            
#             self.theta += self.step_size*(y-np.exp(eta))

        
        # *** END CODE HERE ***

    def predict(self, x):
        """Make a prediction given inputs x.

        Args:
            x: Inputs of shape (n_examples, dim).

        Returns:
            Floating-point prediction for each input, shape (n_examples,).
        """
        # *** START CODE HERE ***
        eta = np.matmul(x, self.theta)
        y_pred = np.exp(eta)
        return y_pred
        # *** END CODE HERE ***

In [25]:
clf = PoissonRegression()

In [26]:
clf.fit(x_train,y_train)

In [27]:
clf.theta

array([0.94784589, 0.50502207, 0.44282382, 0.28191488, 0.40006766])