## As a first proof of concept, we optimize the linear model y=mx+b subject to m>0.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

## Here are the functions "from the math"

In [None]:
def MSELoss(p,X,y): # This is the 1-dimensional MSE loss in the original coordinates.
    N = len(y)
    term = 1/N*sum([(y[i]-p[0]*X[i]-p[1])**2 for i in range(N)])
    return term

In [None]:
def myGrad1(p,X,y): # This assumes that the original loss function is the MSE.
    N = len(y)
    s1 = sum([np.exp(p[0])*X[i]**2 + p[1]*X[i] + X[i]**2 - X[i]*y[i] for i in range(N)])
    s2 = sum([np.exp(p[0])*X[i]+X[i]+p[1]-y[i] for i in range(N)])
    v1 = 2*np.exp(-p[0])*s1
    v2 = 2*s2
    ans = np.array([v1/N,v2/N])
    return ans # the components of the gradient of the MSE in our coordinates.

In [None]:
def myUpdate1(p,v):
    
    def vMove(p2,v2,t):
        # initial v velocity: c3
        # initial v position: c4
        c3 = (-v2)
        c4 = p2
        return c3*t + c4
    
    def uMove(p1,v1,t):
        # initial u-velocity: c1/c2
        # initial u position: ln(c2)
        # p1 = ln(c2); c2=exp(p1)
        # v1 = c1/c2; c1 = exp(p1)*v1
        c1 = np.exp(p1)*(-v1)
        #print(c1)
        c2 = np.exp(p1)
        #print(c2)
        return np.log(c1*t+c2)
    
    ans = np.array([uMove(p[0],v[0],1.0), vMove(p[1],v[1],1.0)])
    return ans

In [None]:
def gradUpdate1(p,X,y,eta):
    
    gradP = myGrad1(p,X,y)
    #print(gradP)
    gradPlr = eta*gradP
    #print(gradPlr)
    newP = myUpdate1(p,gradPlr)
    return newP

## Now we need to implement Gradient Descent (in batches?) Here.

In [None]:
"""
X - X data points
y - y data points
pt - Initial point to start at
Update - Update function to get a new point
epochs - Number of iterations to narrow down the best point
batchsize - The size of the sliced dataset to use in the update function
lr - The learning rate
"""
def SGD(X, y, pt, Update, epochs, batchsize=64, lr=0.01):
	pts = [pt]
	xBatch = X
	yBatch = y
	for _ in range(epochs):
		if len(X) > batchsize:
			# We want to get a random list of integers to get a batch
			xBatch = []
			yBatch = []
			randList = random.sample(range(0, len(X)), batchsize)
			for index in randList:
				xBatch.append(X[index])
				yBatch.append(y[index])
		# Call passed in function TODO: Only will work with gradupdate1, need a way to have it work with multiple.
		pt = Update(pt,xBatch,yBatch,lr)
		pts.extend([pt]) # Track the path in the original coordinates.
	return pts

## Sample with dummy data: make sure the "math checks out."

In [None]:
Xtest = np.linspace(0,10,100)
ytest = [x+1 for x in Xtest] # So the model is y= 1*x+1.
print(len(Xtest))

In [None]:
"""
pt = [2,0] # Initialize (new coordinates). This is (exp(2)+1, 0) in the original coordinates.
pts = [pt]
epochs = 1200 # How long to train for?
myeta = 0.01 # Careful with the learning rate: if it's too big, you can run into a "bad point!"
for i in range(epochs):
    pt = gradUpdate1(pt,Xtest,ytest,myeta)
    pts.extend([pt]) # Track the path in the original coordinates.
"""
pts = SGD(Xtest, ytest, [2,0], gradUpdate1, 1200)
opts = [np.array([np.exp(p[0])+1,p[1]]) for p in pts]

In [None]:
losses = [MSELoss(p,Xtest,ytest) for p in opts]

In [None]:
# Watch the loss for the first few iterations drop.
plt.plot(losses[0:10])

In [None]:
# The loss continues to drop.
plt.plot(losses[10:])

In [None]:
# Here is our approximate solution:
print(opts[-1])

# Dataset Exploration

In [None]:
df = pd.read_csv('data/avocado.csv')
df = df[['AveragePrice', 'Total Volume']].copy()

plt.scatter(x=df['Total Volume'], y=df['AveragePrice'])
plt.xlabel("Volume Sold")
plt.ylabel("Average Price")
plt.title("Avocados Sold vs Price")
plt.show()

In [None]:
df2 = df = pd.read_csv('data/avocado.csv')

In [None]:
df2