In [148]:
import torch

In [149]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [150]:
device

device(type='cpu')

In [151]:
from torch.utils.data import DataLoader,Dataset,TensorDataset

In [152]:
import pandas as pd
import numpy as np

In [153]:
data = pd.read_csv("/content/sample_data/california_housing_test.csv")
data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [154]:
train_data = data.drop(["median_house_value"],axis=1)
train_data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375


In [155]:
# train_data 归一化处理
train_data = (train_data - train_data.mean()) / train_data.std()
train_data.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
0,-1.233523,0.814497,-0.146975,0.596319,0.315284,0.130224,0.317681,1.510494
1,0.646236,-0.645823,1.127377,-0.505466,-0.529167,-0.5762,-0.582646,-0.112305
2,0.891858,-0.87121,-0.146975,0.459002,-0.055216,0.078795,0.013924,1.070971
3,0.61616,-0.852428,-0.067328,-1.174887,-1.238892,-1.313675,-1.31057,1.255656
4,-0.040503,0.326159,-0.784152,-0.630257,-0.687953,-0.536415,-0.692108,-0.469003


In [156]:
label_data = data['median_house_value']
label_data.head()

0    344700.0
1    176500.0
2    270500.0
3    330000.0
4     81700.0
Name: median_house_value, dtype: float64

In [157]:
# 转换成numpy格式
label_data = label_data.values
label_data

array([344700., 176500., 270500., ...,  62000., 162500., 500001.])

In [158]:
train_data = train_data.values
train_data

array([[-1.2335231 ,  0.81449726, -0.14697532, ...,  0.1302239 ,
         0.31768141,  1.51049365],
       [ 0.64623618, -0.64582321,  1.1273772 , ..., -0.57619979,
        -0.58264578, -0.11230546],
       [ 0.89185805, -0.87121029, -0.14697532, ...,  0.07879471,
         0.0139236 ,  1.07097096],
       ...,
       [-0.05554062,  0.31207189, -1.50097488, ..., -0.68876181,
        -0.73862952, -0.81842125],
       [ 1.23773376, -0.72095223,  0.8884361 , ..., -1.31658616,
        -1.30236022, -0.28927927],
       [-0.02045178, -0.57069418,  1.04773017, ..., -0.63054007,
        -0.62916725,  2.56322358]])

In [159]:
label_data = np.array(label_data,dtype=np.float32)

In [160]:
train_data = np.array(train_data,dtype=np.float32)

In [161]:
class DealDataset(Dataset):
  def __init__(self,train_data,label_data):
    self.train_data = train_data
    self.label_data = label_data
    self.len = train_data.shape[0]
  
  def __getitem__(self,index):
    return self.train_data[index],self.label_data[index]

  def __len__(self):
    return self.len

In [162]:
# 提取完成train和label标签,然后通过pytorch相应的工具进行测试集和训练集的数据划分
dataloader = DataLoader(dataset=DealDataset(train_data,label_data),batch_size=16,shuffle=True)

In [163]:
# 通过逻辑回归预测房价
class LogitRegression(torch.nn.Module):
  def __init__(self,in_width,out_width=1):
    super().__init__()
    self.in_width = in_width
    self.out_width = out_width
    # self.n1 = torch.nn.Linear(in_width,128)
    # self.relu = torch.nn.ReLU()
    # self.n2 = torch.nn.Linear(128,64)
    # self.n3 = torch.nn.Linear(64,out_width)
    self.nn = torch.nn.Sequential(
        torch.nn.Linear(self.in_width,1024),
        torch.nn.ReLU(),
        torch.nn.Linear(1024,self.out_width)
    )
  
  def forward(self,x):
    # x = self.n1(x)
    # x = self.relu(x)
    # x = self.n2(x)
    # x = self.relu(x)
    # x = self.n3(x)
    return self.nn(x)

In [164]:
label_data.shape,train_data.shape

((3000,), (3000, 8))

In [165]:
model = LogitRegression(8,1) # 创建一个神经网络
model

LogitRegression(
  (nn): Sequential(
    (0): Linear(in_features=8, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=1, bias=True)
  )
)

In [166]:
for epoch in range(1):
  for i,data in enumerate(dataloader):
    inputs,lables = data
    print(epoch,i,inputs.data.size(),lables.data.size())

0 0 torch.Size([16, 8]) torch.Size([16])
0 1 torch.Size([16, 8]) torch.Size([16])
0 2 torch.Size([16, 8]) torch.Size([16])
0 3 torch.Size([16, 8]) torch.Size([16])
0 4 torch.Size([16, 8]) torch.Size([16])
0 5 torch.Size([16, 8]) torch.Size([16])
0 6 torch.Size([16, 8]) torch.Size([16])
0 7 torch.Size([16, 8]) torch.Size([16])
0 8 torch.Size([16, 8]) torch.Size([16])
0 9 torch.Size([16, 8]) torch.Size([16])
0 10 torch.Size([16, 8]) torch.Size([16])
0 11 torch.Size([16, 8]) torch.Size([16])
0 12 torch.Size([16, 8]) torch.Size([16])
0 13 torch.Size([16, 8]) torch.Size([16])
0 14 torch.Size([16, 8]) torch.Size([16])
0 15 torch.Size([16, 8]) torch.Size([16])
0 16 torch.Size([16, 8]) torch.Size([16])
0 17 torch.Size([16, 8]) torch.Size([16])
0 18 torch.Size([16, 8]) torch.Size([16])
0 19 torch.Size([16, 8]) torch.Size([16])
0 20 torch.Size([16, 8]) torch.Size([16])
0 21 torch.Size([16, 8]) torch.Size([16])
0 22 torch.Size([16, 8]) torch.Size([16])
0 23 torch.Size([16, 8]) torch.Size([16])
0 

In [167]:
# 上面的迭代测试通过,下面定义loss函数和optimizer
optimizer = torch.optim.Adam(model.parameters(),lr=0.1) # 其他保持默认参数

In [168]:
loss_fn = torch.nn.MSELoss()

In [169]:
for epoch in range(10):
  for i,data in enumerate(dataloader):
    x_data,x_label = data
    optimizer.zero_grad()
    y_label = model(x_data)
    # print(x_label.reshape(-1,1))
    # print(y_label)
    # print(x_label.shape,y_label.shape)
    # print(y_label - x_label)
    # print(type(y_label),type(x_label))
    # break;
    loss = loss_fn(y_label,x_label.reshape(-1,1))
    loss.backward()
    optimizer.step()
    if i% 1000 == 0:
      print(epoch,i,loss.item())

0 0 37970817024.0
1 0 16417343488.0
2 0 3258121984.0
3 0 9106311168.0
4 0 3530854912.0
5 0 6031241728.0
6 0 1882214400.0
7 0 2321033984.0
8 0 3433014016.0
9 0 2535810816.0
