In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection  import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
california = fetch_california_housing()
y = california['target']
X = california['data']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=13)


print('X_train - ', X_train.shape, ' - ', type(X_train))
print('-' * 50)
print('y_train - ', y_train.shape, ' - ', type(y_train))
print('-' * 50)
print('X_test - ', X_test.shape, ' - ', type(X_test))
print('-' * 50)
print('y_test - ', y_test.shape, ' - ', type(y_test))
print('-' * 50)

X_train -  (15480, 8)  -  <class 'numpy.ndarray'>
--------------------------------------------------
y_train -  (15480,)  -  <class 'numpy.ndarray'>
--------------------------------------------------
X_test -  (5160, 8)  -  <class 'numpy.ndarray'>
--------------------------------------------------
y_test -  (5160,)  -  <class 'numpy.ndarray'>
--------------------------------------------------


In [None]:
class MyCalifornia(torch.utils.data.Dataset):

  def __init__(self, init_dataset, transform=None):
    self._base_dataset = init_dataset
    self.transform = transform

  def __len__(self):
    return len(self._base_dataset)

  def __getitem__(self, idx):
    date, target = self._base_dataset[idx]
      

In [None]:
img_size = 32

class MyOwnCifar(torch.utils.data.Dataset):
   
    def __init__(self, init_dataset, transform=None):
        self._base_dataset = init_dataset
        self.transform = transform

    def __len__(self):
        return len(self._base_dataset)

    def __getitem__(self, idx):
        img, lbl = self._base_dataset[idx]

        if self.transform is not None:
            img = self.transform(img)
        
        img = img.view(img_size * img_size * 3)
        return img, lbl
    

trans_actions = transforms.Compose([transforms.Resize(img_size + 2),
                                    transforms.RandomCrop(img_size, padding=2), 
                                    transforms.ToTensor()])

dataset = MyOwnCifar(train_dataset, trans_actions)
train_loader = torch.utils.data.DataLoader(dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=2)

In [None]:
batch_size = 100
num_epochs = 200
learning_rate = 0.1
size_hidden= 500

batch_no = len(X_train)
cols=X_train.shape[1] 
n_output=1

class Net(torch.nn.Module):
    def __init__(self, n_feature, size_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(cols, size_hidden)   # hidden layer
        self.predict = torch.nn.Linear(size_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x
net = Net(cols, size_hidden, n_output)

In [None]:
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
# optimizer = torch.optim.RMSprop(net.parameters(), lr=0.1, alpha=0.99)
# optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss(size_average=False)



In [None]:
from sklearn.utils import shuffle
from torch.autograd import Variable
running_loss = 0.0
for epoch in range(num_epochs):
    #Shuffle just mixes up the dataset between epocs
    X_train, y_train = shuffle(X_train, y_train)
    # Mini batch learning
    for i in range(batch_no):
        start = i * batch_size
        end = start + batch_size
        inputs = Variable(torch.FloatTensor(X_train[start:end]))
        labels = Variable(torch.FloatTensor(y_train[start:end]))
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #print("outputs",outputs)
        #print("outputs",outputs,outputs.shape,"labels",labels, labels.shape)
        loss = criterion(outputs, torch.unsqueeze(labels,dim=1))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
    print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
    running_loss = 0.0

Epoch 1 loss:  6995004714.445328
Epoch 2 loss:  11125605.031852722
Epoch 3 loss:  20721.350799560547
Epoch 4 loss:  20701.86823272705
Epoch 5 loss:  20788.438034057617
Epoch 6 loss:  20717.37660217285
Epoch 7 loss:  20857.778884887695
Epoch 8 loss:  20842.279586791992
Epoch 9 loss:  20846.639602661133
Epoch 10 loss:  20890.887413024902
Epoch 11 loss:  20784.564163208008
Epoch 12 loss:  20835.31150817871
Epoch 13 loss:  20795.01293182373
Epoch 14 loss:  20890.455558776855
Epoch 15 loss:  20708.360107421875
Epoch 16 loss:  20883.59659576416
Epoch 17 loss:  20687.91025543213
Epoch 18 loss:  20845.747764587402
Epoch 19 loss:  20950.32216644287
Epoch 20 loss:  20930.514694213867
Epoch 21 loss:  20789.423706054688
Epoch 23 loss:  20717.247451782227
Epoch 24 loss:  20788.134315490723
Epoch 25 loss:  20976.7885055542
Epoch 26 loss:  20820.363136291504
Epoch 27 loss:  20918.427543640137
Epoch 28 loss:  20732.784324645996
Epoch 29 loss:  20758.401947021484
Epoch 30 loss:  20852.65464782715
Epoch

In [None]:
import pandas as pd
from sklearn.metrics import r2_score

X = Variable(torch.FloatTensor(X_train)) 
result = net(X)
pred=result.data[:,0].numpy()
print(len(pred),len(y_train))
r2_score(pred,y_train)

15480 15480


-9556951238085.01

In [None]:
from sklearn.metrics import r2_score
#This is a little bit tricky to get the resulting prediction.  
def calculate_r2(x,y=[]):
    """
    This function will return the r2 if passed x and y or return predictions if just passed x. 
    """
    # Evaluate the model with the test set. 
    X = Variable(torch.FloatTensor(x))  
    result = net(X) #This outputs the value for regression
    result=result.data[:,0].numpy()
  
    if len(y) != 0:
        r2=r2_score(result, y)
        print("R-Squared", r2)
        #print('Accuracy {:.2f}'.format(num_right / len(y)), "for a total of ", len(y), "records")
        return pd.DataFrame(data= {'actual': y, 'predicted': result})
    else:
        print("returning predictions")
        return result

In [None]:
result1=calculate_r2(X_train,y_train)
result2=calculate_r2(X_test,y_test)

R-Squared -9556951238085.01
R-Squared -38435112538528.65


In [None]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit( X_train, y_train )

LinearRegression()

In [None]:
print('R2 for Train)', lm.score( X_train, y_train ))
print('R2 for Test (cross validation)', lm.score(X_test, y_test))

R2 for Train) 0.6071811535067086
R2 for Test (cross validation) 0.6032263670167703


##Adam
- R2 for Train) 0.6071811535067086
- R2 for Test (cross validation) 0.6032263670167717