# JUPYTER NOTEBOOK TIPS

Each rectangular box is called a cell. 
* ctrl+ENTER evaluates the current cell; if it contains Python code, it runs the code, if it contains Markdown, it returns rendered text.
* alt+ENTER evaluates the current cell and adds a new cell below it.
* If you click to the left of a cell, you'll notice the frame changes color to blue. You can erase a cell by hitting 'dd' (that's two "d"s in a row) when the frame is blue.

# Supervised Learning Model Skeleton

We'll use this skeleton for implementing different supervised learning algorithms.

In [3]:
class Model:
        
    def fit(self):
        
        raise NotImplementedError
    
    def predict(self, test_points):
        raise NotImplementedError

## General supervised learning performance related functions 

"conf_matrix" function that takes as input an array of true labels (*true*) and an array of predicted labels (*pred*).

In [4]:
def conf_matrix(true, pred, n_classes):
    '''
    Args:    
        true:  ndarray
            nx1 array of true labels for test set
        pred: ndarray 
            nx1 array of predicted labels for test set
        n_classes: int
    Returns:
        result: ndarray
            n_classes x n_classes confusion matrix
    '''
    n_classes = true.shape[1]
    result = np.zeros(shape=(n_classes,n_classes))
    
    classes = np.arange(0,n_classes,1)
    for t,p in zip(true,pred):
        for i in range(n_classes):
            for j in range(n_classes):
                if(np.argmax(t) == classes[i] and np.argmax(p) == classes[j]):
                    result[i][j] = result[i][j] + 1
    
    # returns the confusion matrix as numpy.ndarray
    return result

def kfold(size, k):

    '''
    Args:
        size: int
            number of examples in the dataset that you want to split into k
        k: int 
            Number of desired splits in data.(Assume test set is already separated.)
        Returns:
        fold_dict: dict
            A dictionary with integer keys corresponding to folds. Values are (training_indices, val_indices).
        
        val_indices: ndarray
            1/k of training indices randomly chosen and separates them as validation partition.
        train_indices: ndarray
            Remaining 1-(1/k) of the indices.
            
            e.g. fold_dict = {0: (train_0_indices, val_0_indices), 
            1: (train_1_indices, val_1_indices), 2: (train_2_indices, val_2_indices)} for k = 3
    '''
    fold_dict = {}
    
    v = (size/float(k))/size
    t = 1 - v
    
    s = np.random.permutation(size)
    val = s[:int(np.ceil(size*(v)))]
    train = s[int(np.ceil(size*(v))):]
    v_size = len(val)
    t_size = len(train)

    ind = 1.0/float(k)
    ranges = np.append(0.0,np.arange(ind, 1.0, ind))
    ranges = np.around(np.append(ranges,1.0),4)
    for i in range(k):
        val_indices = val[int(np.ceil(v_size*(ranges[i]))):int(np.ceil(v_size*(ranges[i+1])))]
        train_indices = train[int(np.ceil(t_size*(ranges[i]))):int(np.ceil(t_size*(ranges[i+1])))]
        new_dict = {i : (train_indices,val_indices)}
        fold_dict.update(new_dict)
        
    return fold_dict

def mse(y_hat, y_true):
    '''
    Args:
        y_hat: ndarray 
            1D array containing data with `float` type. Values predicted by our method
        y_true: ndarray
            1D array containing data with `float` type. True y values
    Returns:
        cost: ndarray
            1D array containing mean squared error between y_pred and y_true.
        
    '''
    cost = np.square(y_true - y_hat).mean()

    return cost

def regularization(weights, method):
    '''
    Args:
        weights: ndarray
            1D array with `float` entries
        method: str
    Returns:
        value: float
            A single value. Regularization term that will be used in cost function in fit.
    '''
    if method == "l1":
        value = np.sum(np.linalg.norm(weights)**1)
    elif method == "l2":
        value = np.sum(weights*weights)
    else:
        print("Reuglarizer method "+regularizer+" is invalid!")
        raise NotImplementedError
    return value

def one_hot(data):
    data_final = np.zeros((data.shape[0],10))
    data_final[np.arange(data.shape[0]),data.astype(int)]=1
    return data_final

def sigmoid(z):
    return 1/(1+np.exp(-z))

def softmax(x):
    sm = (np.exp(x.T) / np.sum(np.exp(x))).T
    return sm