In [3]:
import torch
import torch.utils.data as Data
from sklearn.datasets import load_iris, load_boston
import numpy as np
import pandas as pd
import sklearn

In [4]:
boston_x, boston_y = load_boston(return_X_y=True)
print('boston_x.dtype:{}, shape:{}'.format(boston_x.dtype, boston_x.shape))
print('boston_y.dtype:{}, shape:{}'.format(boston_y.dtype, boston_y.shape))

boston_x.dtype:float64, shape:(506, 13)
boston_y.dtype:float64, shape:(506,)



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

In [7]:
train_xt = torch.from_numpy(boston_x.astype(np.float32))
train_yt = torch.from_numpy(boston_y.astype(np.float32))
print('train_xt.dtype:{}'.format(train_xt.dtype))
print('train_yt.dtype:{}'.format(train_yt.dtype))

train_xt.dtype:torch.float32
train_yt.dtype:torch.float32


In [8]:
# 将训练集数据的张量形式通过TensorDataset，得到综合train_xt和train_yt的数据集
train_data = Data.TensorDataset(train_xt, train_yt)
# 定义数据加载器，将训练数据集进行批量处理
train_loader = Data.DataLoader(
    dataset=train_data,
    batch_size = 64,
    shuffle = True,
    num_workers = 1,
)

for step, (b_x, b_y) in enumerate(train_loader):
  if step > 0:
    break
  print('b_x.shape:{}, dtype:{}'.format(b_x.shape, b_x.dtype))
  print('b_y.shape:{}, dtype:{}'.format(b_y.shape, b_y.dtype))

b_x.shape:torch.Size([64, 13]), dtype:torch.float32
b_y.shape:torch.Size([64]), dtype:torch.float32


## 高维数组的分类数据准备

In [9]:
iris_x, iris_y = load_iris(return_X_y=True)
print('iris_x.dtype:{}, shape:{}'.format(iris_x.dtype, iris_x.shape))
print('iris_y.dtype:{}, shape:{}'.format(iris_y.dtype, iris_y.shape))

iris_x.dtype:float64, shape:(150, 4)
iris_y.dtype:int64, shape:(150,)


In [12]:
train_xt = torch.from_numpy(iris_x.astype(np.float32))
train_yt = torch.from_numpy(iris_y.astype(np.int64))

train_data = Data.TensorDataset(train_xt, train_yt)
train_loader = Data.DataLoader(
    dataset = train_data,
    batch_size = 32,
    shuffle = True,
    num_workers = 1,
)
for step, (b_x, b_y) in enumerate(train_loader):
  if step > 0:
    break
  print('b_x.shape:{}, dtype:{}'.format(b_x.shape, b_x.dtype))
  print('b_y.shape:{}, dtype:{}'.format(b_y.shape, b_y.dtype))

b_x.shape:torch.Size([32, 4]), dtype:torch.float32
b_y.shape:torch.Size([32]), dtype:torch.int64


# 图像数据

In [13]:
import torch
import torch.utils.data as Data
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

In [15]:
train_data = FashionMNIST(
    root='./data/FashionMNIST',
    train = True,
    transform = transforms.ToTensor(),
    download = True,
)

train_loader = Data.DataLoader(
    dataset = train_data,
    batch_size = 64,
    shuffle = True,
    num_workers = 2,
)

print('the number of batch in dataloader is :{}'.format(len(train_loader)))

the number of batch in dataloader is :938


In [17]:
train_data_x = train_data.data.type(torch.FloatTensor) / 255.0
train_data_x = torch.unsqueeze(train_data_x, dim=1)
train_data_y = train_data.targets
print('train_data_x.shape:{}'.format(train_data_x.shape))
print('train_data_y.shape:{}'.format(train_data_y.shape))

train_data_x.shape:torch.Size([60000, 1, 28, 28])
train_data_y.shape:torch.Size([60000])
