In [328]:
import numpy as np
import scipy as sp
from scipy import io
from scipy import stats
import pandas as pd
from sklearn.metrics import accuracy_score

In [329]:
wine = io.loadmat("data_wine.mat")

In [330]:
# separate into training data, training labels, and testing data
wine_raw_training_data = wine["X"] 
wine_training_labels = wine["y"]
wine_test_data = wine["X_test"]
wine_descriptions = wine["description"]

wine_tuples = np.append(wine_raw_training_data, wine_training_labels, axis=1)
wine_tuples_shuffled = np.random.permutation(wine_tuples)

# In order to partition our data, we will use pandas; we will convert our numpy array into a temporary 
# dataframe and set aside the first 10,000 shuffled tuples, which will then be separated back into
# training data and training labels, to the validation set. Then we will assign the remainder, again 
# separating the training data and training labels to the training set.

wine_validation_data = wine_tuples_shuffled[0:500, 0:-1]
wine_validation_labels = wine_tuples_shuffled[0:500, -1]
wine_training_data = wine_tuples_shuffled[500:, 0:-1]
wine_training_labels = wine_tuples_shuffled[500:, -1]

# Standardize Data By Feature and Add Fictitious Dimension
training_features_means = np.mean(wine_training_data, axis=0)
training_features_sd = np.std(wine_training_data, axis=0)
wine_standardized_training_data = (wine_training_data - training_features_means) / training_features_sd
wine_cleaned_training_data = np.hstack((wine_standardized_training_data, np.ones((wine_standardized_training_data.shape[0],1))))

In [327]:
def calculate_cost(w):
    sigmoid = sp.special.expit(wine_cleaned_training_data@w) # 1 / (1 + e^(-Xw))
    # print(wine_training_labels.shape, sigmoid.shape)

    # - y . ln(s)
    first_component = -1 * np.dot(wine_training_labels, np.log(sigmoid)) # -y . ln(s)
    # print(first_component.shape)

    # - (1-y) . ln(1-s)
    second_component_sub_a = np.ones((wine_cleaned_training_data.shape[0], 1)).reshape(-1) - wine_training_labels # 1-y
    second_component_sub_b = np.ones((wine_cleaned_training_data.shape[0], 1)).reshape(-1) - sigmoid #  1-s
    second_component = -1 * np.dot(second_component_sub_a, np.log(second_component_sub_b)) # -(1-y) . ln(1-s)
    
    # - y . ln(s) - (1-y) . ln(1-s)
    return (first_component + second_component)

In [312]:
starting_weights = np.ones((wine_cleaned_training_data.shape[1],1)).reshape(-1)
calculate_cost(starting_weights)

inf

In [313]:
wine_training_labels.shape

(5500,)

In [314]:
def descend(alpha, num_iters, l2, w_0):
    # 1. Solve d(cost)/dw = X.T @ (s(X@w) - y) + 2 \lambda w
    # 2. w_i = w_i - alpha * d(cost)/dw_i for all i in w
    # 3. Repeat until converges

    w_i = w_0
    i = 0
    gradient = 3
    while i < num_iters:
        residual = sp.special.expit(wine_cleaned_training_data @ w_i) - wine_training_labels
        gradient = (wine_cleaned_training_data.T @ (residual)) + (2 * l2 * w_i).reshape(-1)
        w_i = (w_i - alpha * gradient).reshape(-1)
        i += 1
    return gradient, w_i


In [315]:
wine_training_labels.shape

(5500,)

In [316]:
results = descend(0.25, 100, 0, starting_weights)

In [317]:
starting_weights

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [318]:
results[0]

array([  6.57855633,   9.24352281,   0.33123347,   5.49422308,
         5.18912646,  -3.21147819,  -9.57187467,   7.95172059,
         2.78328558,   3.39736926,  -0.68461642,   2.76838111,
       -15.54349074])

In [319]:
results[1]

array([  28.77860516,   96.86774856,   22.04909833, -220.32460115,
         60.47998537,   34.30610675, -149.19463644,  176.89491911,
         53.32440791,   37.08852471,   67.36845936,   23.07408291,
       -305.30347542])

In [320]:
calculate_cost(results[1])

nan

In [326]:
sum(np.round(sp.special.expit(wine_cleaned_training_data@results[1])) != wine_training_labels)/len(wine_training_labels)

0.008545454545454545

In [323]:
t1 = np.array([1,2,3,4,5])
print(np.log(t1))
print(t1.shape)
t2 = np.array([6,7,8,9,10])
print(t2.shape)
np.dot(t1,t2)

[0.         0.69314718 1.09861229 1.38629436 1.60943791]
(5,)
(5,)


130

In [324]:
wine_training_labels.reshape(-1, 1).shap

AttributeError: 'numpy.ndarray' object has no attribute 'shap'

In [None]:
trial = np.array([1,2,3]).reshape(-1,1)
trial2 = np.array([4,5,6]).reshape(-1,1)
trial.T @ trial2

array([[32]])

In [None]:
np.dot??

[0;31mDocstring:[0m
dot(a, b, out=None)

Dot product of two arrays. Specifically,

- If both `a` and `b` are 1-D arrays, it is inner product of vectors
  (without complex conjugation).

- If both `a` and `b` are 2-D arrays, it is matrix multiplication,
  but using :func:`matmul` or ``a @ b`` is preferred.

- If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply`
  and using ``numpy.multiply(a, b)`` or ``a * b`` is preferred.

- If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
  the last axis of `a` and `b`.

- If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
  sum product over the last axis of `a` and the second-to-last axis of `b`::

    dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])

Parameters
----------
a : array_like
    First argument.
b : array_like
    Second argument.
out : ndarray, optional
    Output argument. This must have the exact kind that would be returned
    if it was not used. In particular, it must

In [None]:
trial.reshape(-1,1)

array([[1],
       [2],
       [3]])