In [1]:
import pandas as pd
import numpy as np
import matplotlib as mlt
import matplotlib.pyplot as plt

In [50]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

In [51]:
y = train['SalePrice'] - train['MiscVal']

In [4]:
train = train.drop(['SalePrice'], axis=1)

In [5]:
data = pd.concat([train, test])

In [6]:
data['LifeYr'] = data['YrSold'] - data['YearBuilt']
data['RemodYr'] = data['YrSold'] - data['YearRemodAdd']
data['GarageYr'] = data['YrSold'] - data['GarageYrBlt']

In [7]:
data = data.drop(['YrSold', 'YearBuilt','YearRemodAdd','GarageYrBlt'], axis=1)
data = data.drop(['MiscVal'], axis=1)
data['Misc'] = data.MiscFeature.apply(lambda x: 0 if pd.isnull(x) else 1)
data = data.drop(['MiscFeature'], axis=1)

In [8]:
def get_qual(q):
    if q=='Ex':
        return 5
    elif q=='Gd':
        return 4
    elif q=='TA':
        return 3
    elif q=='Fa':
        return 2
    elif q=='Po':
        return 1
    else:
        return 0

In [9]:
def get_month(m):
    if m in [1, 2, 3]:
        return 1
    elif m in [4, 5, 6]:
        return 2
    elif m in [7, 8, 9]:
        return 3
    else:
         return 4

In [10]:
data['Mo'] = data.MoSold.apply(lambda x: get_month(x))
data['ExterQ'] = data.ExterQual.apply(lambda x: get_qual(x))
data['ExterC'] = data.ExterCond.apply(lambda x: get_qual(x))
data['BsmtQ'] = data.BsmtQual.apply(lambda x: get_qual(x))
data['BsmtC'] = data.BsmtCond.apply(lambda x: get_qual(x))
data['HeatQ'] = data.HeatingQC.apply(lambda x: get_qual(x))
data['KitchenQ'] = data.KitchenQual.apply(lambda x: get_qual(x))
data['FireplaceQ'] = data.FireplaceQu.apply(lambda x: get_qual(x))
data['GarageQ'] = data.GarageQual.apply(lambda x: get_qual(x))
data['GarageC'] = data.GarageCond.apply(lambda x: get_qual(x))
data['poolQ'] = data.PoolQC.apply(lambda x: get_qual(x))

In [11]:
data = data.drop(['MoSold', 'ExterQual', 'ExterCond', 'BsmtQual', 'BsmtCond', 'HeatingQC', 'KitchenQual', 'FireplaceQu', 'GarageQual', 'GarageCond', 'PoolQC'], axis=1)

In [12]:
data.eval('Pool = poolQ * PoolArea', inplace=True)

In [13]:
data['MSSubClass'] = data['MSSubClass'].astype(str)

In [14]:
mean_cols = data.mean()

In [15]:
data = data.fillna(mean_cols)

In [16]:
data = pd.get_dummies(data)

In [17]:
x_train = data.iloc[np.r_[0:1460]]

In [21]:
x_train

Unnamed: 0,Id,LotFrontage,LotArea,OverallQual,OverallCond,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,1,65.0,8450,7,5,196.0,706.0,0.0,150.0,856.0,...,0,0,0,1,0,0,0,0,1,0
1,2,80.0,9600,6,8,0.0,978.0,0.0,284.0,1262.0,...,0,0,0,1,0,0,0,0,1,0
2,3,68.0,11250,7,5,162.0,486.0,0.0,434.0,920.0,...,0,0,0,1,0,0,0,0,1,0
3,4,60.0,9550,7,5,0.0,216.0,0.0,540.0,756.0,...,0,0,0,1,1,0,0,0,0,0
4,5,84.0,14260,8,5,350.0,655.0,0.0,490.0,1145.0,...,0,0,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,62.0,7917,6,5,0.0,0.0,0.0,953.0,953.0,...,0,0,0,1,0,0,0,0,1,0
1456,1457,85.0,13175,6,6,119.0,790.0,163.0,589.0,1542.0,...,0,0,0,1,0,0,0,0,1,0
1457,1458,66.0,9042,7,9,0.0,275.0,0.0,877.0,1152.0,...,0,0,0,1,0,0,0,0,1,0
1458,1459,68.0,9717,5,6,0.0,49.0,1029.0,0.0,1078.0,...,0,0,0,1,0,0,0,0,1,0


In [20]:
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F

In [22]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.dense = nn.Sequential()
        self.dense.add_module("dense1", nn.Linear(266, 150))
        self.dense.add_module("relu1", nn.ReLU())
        self.dense.add_module("dense2", nn.Linear(150, 50))
        self.dense.add_module("relu2", nn.ReLU())
        self.dense.add_module("regression", nn.Linear(50, 1))
    
    def forward(self, x):
        y_pred = self.dense(x)
        return y_pred

In [25]:
net = Net()

In [26]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
loss = nn.MSELoss()

In [53]:
for epoch in range(10000):
    num = np.random.randint(1460)
    x = torch.from_numpy(x_train.values[num])
    x = torch.tensor(x, dtype=torch.float32)
    y_ = net(x)
    t = torch.Tensor(y.values[num])
    loss_ = loss(y_, t)
    if epoch % 500 == 0:
        print(epoch, loss_[0])
    optimizer.zero_grad()
    loss_.backward()
    optimizer.step()

  after removing the cwd from sys.path.


IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number

In [48]:
y

tensor(137800)