In [37]:
import torch
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn import metrics

In [5]:
t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]
t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
t_c = torch.tensor(t_c)
t_u = torch.tensor(t_u)

def model(t_u, w1, w2, b):
  return w2 * t_u ** 2 + w1 * t_u + b;

def mseloss(t_p, t_c):
  squared_diffs = (t_p - t_c) ** 2
  return squared_diffs.mean();

In [38]:
def trainingloop (epochs, lr, params, t_u, t_c):
  optimizer = optim.SGD([params], lr=lr)
  #optimizer = optim.Adam([params], lr=lr)

  t_p = model(t_u, *params)
  print('Starting Loss %d' % (mseloss(t_p, t_c)))

  for epoch in range(1, epochs+1):
    if params.grad is not None:
      params.grad.zero_()

    t_p = model(t_u, *params)
    loss = mseloss(t_p, t_c)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    with torch.no_grad():
      params -= lr * params.grad

    if epoch % 500 == 0:
      print('Epoch %d, Loss %f' % (epoch, float(loss)))

trainingloop(
    epochs = 5000, #100000
    lr = 0.000000042, #  lr = 0.0000000085, # lr = 0.0000000042, # lr = 0.00000000042, # lr = 0.0000000-42,
    params = torch.tensor([1.0, 1.0, 0.0], requires_grad=True),
    t_u = t_u,
    t_c = t_c
)



Starting Loss 11709471
Epoch 500, Loss 199.108978
Epoch 1000, Loss 194.201599
Epoch 1500, Loss 189.417542
Epoch 2000, Loss 184.753677
Epoch 2500, Loss 180.206985
Epoch 3000, Loss 175.774475
Epoch 3500, Loss 171.453278
Epoch 4000, Loss 167.240631
Epoch 4500, Loss 163.134537
Epoch 5000, Loss 159.131485


In [39]:
from google.colab import drive
drive.mount('/content/drive')

# Read Dataset
hsdb = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/4105a4/Housing.csv')
cndb = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/4105a4/cancer.csv')

np.random.seed(0)

nmlz = StandardScaler()

Mounted at /content/drive


In [56]:
#Problem 2
df_train, df_test = train_test_split(hsdb, train_size = 0.8, test_size = 0.2, random_state = 100)

def binary_map(x):
    return x.map({'yes': 1, 'no': 0})

def trinary_map(x):
    return x.map({'unfurnished': 0, 'semi-furnished': 0.5, 'furnished' : 1})

binarymap =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
trinarymap = ['furnishingstatus']

df_train[binarymap] = df_train[binarymap].apply(binary_map)
df_test[binarymap] = df_test[binarymap].apply(binary_map)

df_train[trinarymap] = df_train[trinarymap].apply(trinary_map)
df_test[trinarymap] = df_test[trinarymap].apply(trinary_map)

truth_train = df_train.values[:, 0]
records_train = len(truth_train)
bias_train = np.ones((records_train, 1))

truth_val = df_test.values[:, 0]
records_val = len(truth_val)
bias_val = np.ones((records_val, 1))

num_vars = ['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'price']

df_stdt = df_train
Standard = StandardScaler()
df_stdt[num_vars] = Standard.fit_transform(df_train[num_vars])

df_stdv = df_test
df_stdv[num_vars] = Standard.fit_transform(df_test[num_vars])

area_train = df_stdt.values[:, 1].reshape(records_train, 1)
beds_train = df_stdt.values[:, 2].reshape(records_train, 1)
baths_train = df_stdt.values[:, 3].reshape(records_train, 1)
stories_train = df_stdt.values[:, 4].reshape(records_train, 1)
parking_train = df_stdt.values[:, 10].reshape(records_train, 1)
mainrd_train = df_stdt.values[:, 5].reshape(records_train, 1)
guest_train = df_stdt.values[:, 6].reshape(records_train, 1)
basement_train = df_stdt.values[:, 7].reshape(records_train, 1)
hotwater_train = df_stdt.values[:, 8].reshape(records_train, 1)
hvac_train = df_stdt.values[:, 9].reshape(records_train, 1)
prefarea_train = df_stdt.values[:, 11].reshape(records_train, 1)

area_val = df_stdv.values[:, 1].reshape(records_val, 1)
beds_val = df_stdv.values[:, 2].reshape(records_val, 1)
baths_val = df_stdv.values[:, 3].reshape(records_val, 1)
stories_val = df_stdv.values[:, 4].reshape(records_val, 1)
parking_val = df_stdv.values[:, 10].reshape(records_val, 1)
mainrd_val = df_stdv.values[:, 5].reshape(records_val, 1)
guest_val = df_stdv.values[:, 6].reshape(records_val, 1)
basement_val = df_stdv.values[:, 7].reshape(records_val, 1)
hotwater_val = df_stdv.values[:, 8].reshape(records_val, 1)
hvac_val = df_stdv.values[:, 9].reshape(records_val, 1)
prefarea_val = df_stdv.values[:, 11].reshape(records_val, 1)

fulltrainexplanatory = torch.tensor(np.hstack((bias_train, area_train, beds_train, baths_train, stories_train, parking_train, mainrd_train, guest_train, basement_train, hotwater_train, hvac_train, prefarea_train)))
fulltestexplanatory = torch.tensor(np.hstack((bias_val, area_val, beds_val, baths_val, stories_val, parking_val, mainrd_val, guest_val, basement_val, hotwater_val, hvac_val, prefarea_val)))

partialtrainexplanatory = torch.tensor(np.hstack((bias_train, area_train, beds_train, baths_train, stories_train, parking_train)))
partialtestexplanatory = torch.tensor(np.hstack((bias_val, area_val, beds_val, baths_val, stories_val, parking_val)))

traintruth = torch.tensor(truth_train)
testtruth = torch.tensor(truth_val)

fulltrainexplanatory = fulltrainexplanatory.t()
fulltestexplanatory = fulltestexplanatory.t()
partialtrainexplanatory = partialtrainexplanatory.t()
partialtestexplanatory = partialtestexplanatory.t()

traintruth = traintruth.t()
testtruth = testtruth.t()


In [57]:
def partialmodel(t_u, b, x1, x2, x3, x4, x5):
  return b + x1*t_u + x2*t_u + x3*t_u + x4*t_u + x5*t_u;

def fullmodel(t_u, b, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11):
  return b + x1*t_u + x2*t_u + x3*t_u + x4*t_u + x5*t_u + x6*t_u + x7*t_u + x8*t_u + x9*t_u + x10*t_u + x11*t_u;

def mseloss(t_p, t_c):
  squared_diffs = (t_p - t_c) ** 2
  return squared_diffs.mean();

In [67]:
#5 Inputs
epochs = 5000 #100000
lr = 0.001 #  lr = 0.1, # lr = 0.01, # lr = 0.0001,
params = torch.tensor([0.0, 1.0, 1.0, 1.0, 1.0, 1.0], requires_grad=True)


#optimizer = optim.SGD([params], lr=lr)
optimizer = optim.Adam([params], lr=lr)

for epoch in range(1, epochs+1):
  if params.grad is not None:
    params.grad.zero_()

  t_p = partialmodel(partialtrainexplanatory, *params)
  loss = mseloss(t_p, traintruth)
  t_v = partialmodel(partialtestexplanatory, *params)
  valloss = mseloss(t_v, testtruth)

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  with torch.no_grad():
    params -= lr * params.grad

  if epoch % 500 == 0:
    print('Epoch %d, Loss %f, Validation Loss %f' % (epoch, float(loss), float(valloss)))


Epoch 500, Loss 6018205745742.316406, Validation Loss 5410330584617.195312
Epoch 1000, Loss 3444604505890.724609, Validation Loss 3180612894052.271484
Epoch 1500, Loss 3072193026913.602051, Validation Loss 2939480973457.093262
Epoch 2000, Loss 3018299780136.859375, Validation Loss 2935531045533.281250
Epoch 2500, Loss 3010500428527.332520, Validation Loss 2946731265375.321289
Epoch 3000, Loss 3009371799620.195312, Validation Loss 2952830325818.605957
Epoch 3500, Loss 3009208493021.770508, Validation Loss 2955416316937.838379
Epoch 4000, Loss 3009184873489.636230, Validation Loss 2956437656709.596191
Epoch 4500, Loss 3009181454701.430664, Validation Loss 2956830522146.016602
Epoch 5000, Loss 3009180959016.550293, Validation Loss 2956978086424.639160


In [69]:
#5 Inputs
epochs = 5000 #100000
lr = 0.001 #  lr = 0.0001, # lr = 0.1, # lr = 0.01,
params = torch.tensor([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], requires_grad=True)


#optimizer = optim.SGD([params], lr=lr)
optimizer = optim.Adam([params], lr=lr)

for epoch in range(1, epochs+1):
  if params.grad is not None:
    params.grad.zero_()

  t_p = fullmodel(fulltrainexplanatory, *params)
  loss = mseloss(t_p, traintruth)
  t_v = fullmodel(fulltestexplanatory, *params)
  valloss = mseloss(t_v, testtruth)

  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  with torch.no_grad():
    params -= lr * params.grad

  if epoch % 500 == 0:
    print('Epoch %d, Loss %f, Validation Loss %f' % (epoch, float(loss), float(valloss)))


Epoch 500, Loss 6262994992080.244141, Validation Loss 5659523391881.043945
Epoch 1000, Loss 3678746474151.362305, Validation Loss 3425563450587.689453
Epoch 1500, Loss 3247658613606.986328, Validation Loss 3135899212735.298828
Epoch 2000, Loss 3175746320164.909180, Validation Loss 3121469881686.574219
Epoch 2500, Loss 3163749735938.838379, Validation Loss 3132905420597.503418
Epoch 3000, Loss 3161748608868.707520, Validation Loss 3140466469381.775391
Epoch 3500, Loss 3161414753261.788086, Validation Loss 3144037080527.580078
Epoch 4000, Loss 3161359049850.500977, Validation Loss 3145576332533.517578
Epoch 4500, Loss 3161349778990.086426, Validation Loss 3146216621395.637695
Epoch 5000, Loss 3161348230571.668945, Validation Loss 3146478851412.097656
