In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [None]:
def step_gradient(learning_rate, theta, x_sample, y_sample):

    N = float(x_sample.values[:,0].size)
    x = x_sample.values
    y = y_sample.values
    h = theta.dot(x.transpose())
    loss = h - y
    
    cost = np.sum(loss ** 2)
    
    gradients = ((x.T * (loss)).T)/N
    
    gradient = np.sum(gradients, axis=0)

    direction = learning_rate * gradient
    
    #theta = theta - minimization
    
    return -direction, np.linalg.norm(direction), cost

In [None]:
def descent(learning_rate, initial_theta, iterations, x_sample, y_sample, batch_size='full'):
    
    theta = initial_theta
    
    if batch_size == 'stochastic' or batch_size == 1:
        print('Stochastic')
        m = x_sample.shape[0]
        loss = 0
        for i in range(iterations):
            for item, price in zip(x_sample.values, y_sample.values):
                y_pred = np.dot(item, theta)
                loss = y_pred - price
                cost = np.sum(loss ** 2)
                gradient = np.dot(item.transpose(), loss)
                theta = theta - learning_rate * gradient
            if i % (iterations//50) == 0:
                print('Iteration {}: {}'.format(i, cost))
        return theta
        
    if batch_size == 'full':
        for i in range(iterations):
            theta_cost = step_gradient(learning_rate, theta, x_sample, y_sample)
            theta += theta_cost[0]
            if i % (iterations//50) == 0:
                print('Iteration {}: {} \t cost: {}'.format(i, theta_cost[1], theta_cost[2]))
    elif type(batch_size) is int:
        i = 0
        for i in range(iterations):
            j = 0
            while j < len(x_sample):
                theta_cost = step_gradient(learning_rate, theta, x_sample[j:j+batch_size], y_sample[j:j+batch_size])
                theta += theta_cost[0]
                j += batch_size    
            if i % (iterations//50) == 0:
                print('Iteration {}: {} \t cost: {}'.format(i, theta_cost[1], theta_cost[2]))
                
    return theta

### one_hot = False, not using one-hot encoding

In [None]:
one_hot = False
data = pd.read_csv('diamonds.csv', index_col=0)

In [None]:
data.head()

In [None]:
train, test = train_test_split(data, test_size=0.15)

In [None]:
train_x = train.drop('price', axis=1)
test_x = test.drop('price', axis=1)

In [None]:
train_x.head()

In [None]:
train_y = train.loc[:,'price']
test_y = test.loc[:,'price']

In [None]:
train_one_hot_cut = pd.get_dummies(train['cut'])
train_one_hot_color = pd.get_dummies(train['color'])
train_one_hot_clarity = pd.get_dummies(train['clarity'])

test_one_hot_cut = pd.get_dummies(test['cut'])
test_one_hot_color = pd.get_dummies(test['color'])
test_one_hot_clarity = pd.get_dummies(test['clarity'])

train_x = train_x.drop('cut', axis=1)
train_x = train_x.drop('color', axis=1)
train_x = train_x.drop('clarity', axis=1)

test_x = test_x.drop('cut', axis=1)
test_x = test_x.drop('color', axis=1)
test_x = test_x.drop('clarity', axis=1)

In [None]:
x_scaler = StandardScaler().fit(train_x)

train_x = pd.DataFrame(x_scaler.transform(train_x), columns=train_x.columns, index=train_x.index)
test_x = pd.DataFrame(x_scaler.transform(test_x), columns=test_x.columns, index=test_x.index)

In [None]:
train_x = train_x.join(train_one_hot_cut)
train_x = train_x.join(train_one_hot_color)
train_x = train_x.join(train_one_hot_clarity)

test_x = test_x.join(test_one_hot_cut)
test_x = test_x.join(test_one_hot_color)
test_x = test_x.join(test_one_hot_clarity)

In [None]:
train_x.head()

#### descent(learning_rate, initial_theta, iterations, x_sample, y_sample, batch_size='full')

**Parameters:**
* **learning_rate: float**  
    The descent step size.
    

* **initial_theta: array-like object**  
    The coefficients (also known as $\theta$).
    

* **iterations: int**  
    Amount of iterations to be executed.
    

* **x_sample: array-like object**  
    Features of the training data.
    
    
* **y_sample: array-like object**  
    Target of the training data.
    
    
* **batch_size: int, or the string 'full', or the string 'stochastic'**  
    Batch size of eatch step. Setting to 'stochastic' is the same as 1.
    
**Returns:**
* **theta** 
    The model extracted from the training.

### Examples:
descent(0.1, initial_theta, 10000, x_sample, y_sample, 1000)

Will run 10 thousand iterations with batch size of 1000.

In [None]:
initial_theta = np.ones(train_x.values[0].size)
new_theta = descent(0.001, initial_theta, 100, train_x, train_y, 'stochastic')
print(new_theta)

In [None]:
y_train_pred = new_theta.dot(train_x.transpose())

In [None]:
y_test_pred = test_x.dot(new_theta)

In [None]:
metrics.r2_score(train_y, y_train_pred)

In [None]:
metrics.r2_score(test_y, y_test_pred)

In [None]:
#np.savetxt('/home/furusato/tests/jupyter/mo444a/batch_model.txt', new_theta)