In [1]:
import pickle
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np 
import pandas as pd
import re

from sklearn.metrics import mean_absolute_error

In [2]:
from word2number import w2n

In [None]:
pip install word2number

In [3]:
with open('./processed_data.pkl', 'rb') as f:
    data = pickle.load(f)

In [4]:
data.shape

(205, 45)

In [5]:
data.columns

Index(['symboling', 'doornumber', 'wheelbase', 'carlength', 'carwidth',
       'carheight', 'curbweight', 'cylindernumber', 'enginesize', 'boreratio',
       'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg',
       'highwaympg', 'price', 'carbody_convertible', 'carbody_hardtop',
       'carbody_hatchback', 'carbody_sedan', 'carbody_wagon', 'drivewheel_4wd',
       'drivewheel_fwd', 'drivewheel_rwd', 'enginetype_dohc',
       'enginetype_dohcv', 'enginetype_l', 'enginetype_ohc', 'enginetype_ohcf',
       'enginetype_ohcv', 'enginetype_rotor', 'fuelsystem_1bbl',
       'fuelsystem_2bbl', 'fuelsystem_4bbl', 'fuelsystem_idi',
       'fuelsystem_mfi', 'fuelsystem_mpfi', 'fuelsystem_spdi',
       'fuelsystem_spfi', 'enginelocation_front', 'enginelocation_rear',
       'fueltype_diesel', 'fueltype_gas', 'CarName_first'],
      dtype='object')

In [6]:
data

Unnamed: 0,symboling,doornumber,wheelbase,carlength,carwidth,carheight,curbweight,cylindernumber,enginesize,boreratio,...,fuelsystem_idi,fuelsystem_mfi,fuelsystem_mpfi,fuelsystem_spdi,fuelsystem_spfi,enginelocation_front,enginelocation_rear,fueltype_diesel,fueltype_gas,CarName_first
0,3,2,0.058309,0.413433,0.316667,0.083333,0.411171,4,0.260377,0.664286,...,0,0,1,0,0,1,0,0,1,alfa
1,3,2,0.058309,0.413433,0.316667,0.083333,0.411171,4,0.260377,0.664286,...,0,0,1,0,0,1,0,0,1,alfa
2,1,2,0.230321,0.449254,0.433333,0.383333,0.517843,6,0.343396,0.100000,...,0,0,1,0,0,1,0,0,1,alfa
3,2,4,0.384840,0.529851,0.491667,0.541667,0.329325,4,0.181132,0.464286,...,0,0,1,0,0,1,0,0,1,audi
4,2,4,0.373178,0.529851,0.508333,0.541667,0.518231,5,0.283019,0.464286,...,0,0,1,0,0,1,0,0,1,audi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,4,0.655977,0.711940,0.716667,0.641667,0.567882,4,0.301887,0.885714,...,0,0,1,0,0,1,0,0,1,volvo
201,-1,4,0.655977,0.711940,0.708333,0.641667,0.605508,4,0.301887,0.885714,...,0,0,1,0,0,1,0,0,1,volvo
202,-1,4,0.655977,0.711940,0.716667,0.641667,0.591156,6,0.422642,0.742857,...,0,0,1,0,0,1,0,0,1,volvo
203,-1,4,0.655977,0.711940,0.716667,0.641667,0.670675,6,0.316981,0.335714,...,1,0,0,0,0,1,0,1,0,volvo


In [7]:
test_data = data.groupby('CarName_first').head(6)

indexes = ~data.index.isin(test_data.index)
train_data = data.loc[indexes]

In [8]:
y_train = train_data['price']
y_test = test_data['price']

In [9]:
y_train

9      0.316299
16     0.898590
17     0.788491
27     0.085398
28     0.094409
         ...   
200    0.291123
201    0.345738
202    0.406311
203    0.430763
204    0.434611
Name: price, Length: 87, dtype: float64

In [10]:
type(y_train)

pandas.core.series.Series

In [11]:
X_train = train_data.loc[:, ~train_data.columns.isin(['CarName_first', 'price' ])]

X_test = test_data.loc[:, ~test_data.columns.isin(['CarName_first', 'price'])]

In [12]:
m, n = X_train.shape
print(m, n)

87 43


In [13]:
def initialize_weights(n):
    W = np.zeros((1,n))
    return W

In [14]:
def predict(X, W):
    y_pred = np.dot(X, W.T)
    return y_pred


In [15]:
def compute_cost_rms(y_true, y_pred, m):
    err = y_true - y_pred
    squared_err = np.square(err)
    cost = 1/(2* m)  * (np.sum(squared_err))
    return cost


In [58]:
def compute_grad(X, y_true, y_pred):
    err = y_true-y_pred
    print("err shape", err.shape)
    print("X shape", X.shape)
    #print(X.shape)
    #print(err.shape)
    grad = np.sum(np.multiply(err, X), axis=0)
    print("grad shape", grad.shape)
    return grad



In [72]:
def train_model(X, y, m, n, batch_size, num_iter, alpha, lambda_var):
    s = X.shape[0]/batch_size
    if type(s)== "float":
        num_batch = int(s)+1
    else:
        num_batch = int(s)
    costs= list()
    k = num_batch+1
    
    for i in range(0, num_iter):
        W = initialize_weights(n+1)
        start = 0
        #print(W.shape)
        for j in range(1,k):
            #print(y.shape)
           
            if j==num_batch:
                X_batch = X.iloc[start: , : ]
                y_batch = y[start: ]
                y_batch = np.expand_dims(y_batch, axis= 1)
                #print(y_batch.shape)
            else:
                end = start+ batch_size
                X_batch = X.iloc[start: end, : ]
                y_batch = y[start: end]
                y_batch = np.expand_dims(y_batch, axis=1)
                #print(y_batch.shape)
            start = end
            m= X_batch.shape[0]
            # Adding X_o for W_o
            X_batch = np.append(np.ones((m,1)), X_batch, axis=1 )
            y_pred = predict(X_batch, W)
            #print(y_pred.shape)
            cost = compute_cost_rms(y_batch, y_pred, m)
            costs.append(cost)
            grad = compute_grad(X_batch, y_batch, y_pred)
            grad = np.expand_dims(grad, axis=0)
#             print(grad.shape)
#             print(W.shape)
            print("w shape", W.shape)
            reg_term = 1- ((alpha/m)*lambda_var)
            W = (np.multiply(reg_term, W)) - (np.multiply((alpha/m), grad))
    return W, costs

In [73]:
m, n  = X_train.shape
trained_W, costs = train_model(X_train, y_train,m, n,  int(10), 1, 0.01, -3)

err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (10, 1)
X shape (10, 44)
grad shape (44,)
w shape (1, 44)
err shape (17, 1)
X shape (17, 44)
grad shape (44,)
w shape (1, 44)


In [66]:
trained_W.shape

(1, 44)

In [74]:
costs

[0.08046762672477246,
 0.03058419777869445,
 0.21060335467842944,
 0.1942353103087126,
 0.4570825670192668,
 0.6042177352085977,
 0.9254045880181201,
 2.565387639597304]

In [37]:
a = np.array([2, 4, 6])
a 

array([2, 4, 6])

In [34]:
b = np.array(
[[1, 3, 5],
[7, 9 , 11]]

)

In [39]:
c = np.multiply(a, b)
c

array([[ 2, 12, 30],
       [14, 36, 66]])

In [40]:
np.sum(c, axis =0)

array([16, 48, 96])