## by： AI蜗牛车
## 公众号： AI蜗牛车
## wx：Che_Hongshu

# Demo1-TensorDataset

In [4]:
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import torch

In [5]:
data_df = pd.read_csv('PRSA_data_2010.1.1-2014.12.31.csv')

In [6]:
data_df.head()

Unnamed: 0,No,year,month,day,hour,pm2.5,DEWP,TEMP,PRES,cbwd,Iws,Is,Ir
0,1,2010,1,1,0,,-21,-11.0,1021.0,NW,1.79,0,0
1,2,2010,1,1,1,,-21,-12.0,1020.0,NW,4.92,0,0
2,3,2010,1,1,2,,-21,-11.0,1019.0,NW,6.71,0,0
3,4,2010,1,1,3,,-21,-14.0,1019.0,NW,9.84,0,0
4,5,2010,1,1,4,,-20,-12.0,1018.0,NW,12.97,0,0


In [7]:
feature_col =  ['No', 'year', 'month', 'day', 'hour', 'DEWP', 'TEMP', 'PRES', 'Iws', 'Is', 'Ir']
data_df_x = data_df.loc[:127,feature_col]
label_col = ['pm2.5']
data_df_y = data_df.loc[:127, label_col]

In [8]:
data_numpy_x = data_df_x.values
data_numpy_y = data_df_y.values

In [9]:
X = torch.from_numpy(data_numpy_x)
Y = torch.from_numpy(data_numpy_y)

In [44]:
dataset = TensorDataset(X, Y)

In [45]:
data_loader = DataLoader(dataset=dataset,
          batch_size=64,
          shuffle=True,
          num_workers=2)

In [46]:
for i, data in enumerate(data_loader):
    print(i)
    x, y = data
    print(type(x), type(y))
    print(x.data.size(), y.data.size())

0
<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([64, 11]) torch.Size([64, 1])
1
<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([64, 11]) torch.Size([64, 1])


# Demo2-自定义Dataset(结构化数据)

In [49]:
from torch.utils.data import Dataset,DataLoader
import pandas as pd
import numpy as np
import torch

In [50]:
class MyDataset(Dataset):
    
    def __init__(self):
        data_df = pd.read_csv('PRSA_data_2010.1.1-2014.12.31.csv')
        feature_col =  ['No', 'year', 'month', 'day', 'hour', 'DEWP', 'TEMP', 'PRES', 'Iws', 'Is', 'Ir']
        data_df_x = data_df.loc[:127,feature_col]
        label_col = ['pm2.5']
        data_df_y = data_df.loc[:127, label_col]
        data_numpy_x = data_df_x.values
        data_numpy_y = data_df_y.values
        self.X = torch.from_numpy(data_numpy_x)
        self.Y = torch.from_numpy(data_numpy_y)
        self.len = data_numpy_x.shape[0]
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]

    def __len__(self):
        return self.len

# 实例化这个类，和上面的 直接咱们用官方的TensorDataset是一样的
dataset2= MyDataset()

data_loader2 = DataLoader(dataset=dataset2,
                          batch_size=64,
                          shuffle=True)

for i, data in enumerate(data_loader2):
    print(i)
    x, y = data
    print(type(x), type(y))
    print(x.data.size(), y.data.size())   

0
<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([64, 11]) torch.Size([64, 1])
1
<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([64, 11]) torch.Size([64, 1])


# Demo3-自定义Dataset(图片数据)

In [97]:
from torch.utils.data import DataLoader,Dataset
from PIL import Image
import matplotlib.pyplot as plt
import os
import torch
from torchvision import transforms
import numpy as np

In [98]:
class MyData_Animal(Dataset): 
    def __init__(self, root_dir, transform=None): 
        self.root_dir = root_dir  
        self.transform = transform 
        self.images_path_list = os.listdir(self.root_dir) 
    
    def __len__(self): 
        return len(self.images_path_list)
    
    def __getitem__(self,index): 
        image_index = self.images_path_list[index]
        img_path = os.path.join(self.root_dir, image_index)
        img = Image.open(img_path)
        img = img.resize((64, 64))
        label = img_path.split('\\')[-1].split('.')[0]
        
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [103]:
transform = transforms.Compose([
#     transforms.Resize(64), 
    transforms.ToTensor(),
    #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # 归一化
])

mydataset_animal = MyData_Animal('./train', transform)

data_loader3 = DataLoader(dataset=mydataset_animal,
                          batch_size=4,
                          shuffle=True)



for i, (img, label) in enumerate(data_loader3):
    print(i)
    print(type(img), type(label))
    print(img.data.size(), label)

0
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([4, 3, 64, 64]) ('dog', 'cat', 'cat', 'dog')
1
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([4, 3, 64, 64]) ('cat', 'dog', 'cat', 'dog')
2
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([4, 3, 64, 64]) ('dog', 'dog', 'dog', 'cat')
3
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([4, 3, 64, 64]) ('cat', 'cat', 'dog', 'dog')
4
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([4, 3, 64, 64]) ('dog', 'dog', 'dog', 'dog')
5
<class 'torch.Tensor'> <class 'tuple'>
torch.Size([2, 3, 64, 64]) ('cat', 'dog')


In [100]:
len(mydataset_animal)

22