In [3]:
import torch

In [5]:
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

### 通过张量的shape属性访问张良的形状和张两种元素的总数

In [7]:
x.shape
x.numel()

12

In [9]:
X = x.reshape(3,4)
X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [11]:
torch.zeros((3,4))

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [13]:
X = torch.arange(12,dtype = torch.float32).reshape((3, 4))
Y = torch.tensor([[2.0, 1, 4,3],[1,2,3,4],[4,3,2,1]])
torch.cat((X, Y),dim = 0),torch.cat((X,Y), dim = 1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

### 广播机制

In [15]:
a = torch.arange(3).reshape((3,1))
b = torch.arange(2).reshape((1,2))
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

### 数据预处理

In [17]:
import os
os.makedirs(os.path.join('..','data'),exist_ok = True)
data_file = os.path.join('..','data','house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Test,Price\n') # 列名
    f.write('NA,Pave,NA,127500\n') # 每⾏表⽰⼀个数据样本
    f.write('2,NA,1,106000\n')
    f.write('4,NA,NA,178100\n')
    f.write('NA,NA,1,140000\n')

In [19]:
import pandas as pd
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley  Test   Price
0       NaN  Pave   NaN  127500
1       2.0   NaN   1.0  106000
2       4.0   NaN   NaN  178100
3       NaN   NaN   1.0  140000


### 处理缺失值

In [21]:
inputs, outputs = data.iloc[:,0:2],data.iloc[:,2]
inputs = inputs.fillna(inputs.mean())
print(inputs)
print(data.iloc[:])

   NumRooms Alley
0       3.0  Pave
1       2.0   NaN
2       4.0   NaN
3       3.0   NaN
   NumRooms Alley  Test   Price
0       NaN  Pave   NaN  127500
1       2.0   NaN   1.0  106000
2       4.0   NaN   NaN  178100
3       NaN   NaN   1.0  140000


In [23]:
inputs = pd.get_dummies(inputs,dummy_na = True)
print(inputs)

   NumRooms  Alley_Pave  Alley_nan
0       3.0           1          0
1       2.0           0          1
2       4.0           0          1
3       3.0           0          1


In [25]:
X,y = torch.tensor(inputs.values),torch.tensor(outputs.values)
X,y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([nan, 1., nan, 1.], dtype=torch.float64))

### 删除缺失值最多的列
### 将预处理后的数据集转化为张量模式

In [27]:
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley  Test   Price
0       NaN  Pave   NaN  127500
1       2.0   NaN   1.0  106000
2       4.0   NaN   NaN  178100
3       NaN   NaN   1.0  140000


In [36]:
# 删除缺失值最多的列
count = data.isna().sum()
print(count.idxmax())
data = data.drop(count.idxmax(),1)
print(data)

NumRooms
   Test   Price
0   NaN  127500
1   1.0  106000
2   NaN  178100
3   1.0  140000


In [39]:
# 将预处理后的数据集转化为张量模式
torch.tensor(data.values)

tensor([[       nan, 1.2750e+05],
        [1.0000e+00, 1.0600e+05],
        [       nan, 1.7810e+05],
        [1.0000e+00, 1.4000e+05]], dtype=torch.float64)