In [2]:
# Iris is a genus of hundreds of species of flowering plants with showy flowers. 
# The Iris data set consists of 150 samples from three species of Iris which are hard to distinguish (Iris setosa, Iris virginica and Iris versicolor). 
# There are four features from each sample: the length and the width of the sepals and petals, in centimeters. 
# Based on these features, the goal is to predict which species of Iris the sample belongs to.

In [2]:
import torch
import torch.nn as nn

In [5]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl (11.1 MB)
   ---------------------------------------- 0.0/11.1 MB ? eta -:--:--
   ---------------------------------------- 0.1/11.1 MB 1.7 MB/s eta 0:00:07
   ---------------------------------------- 0.1/11.1 MB 1.7 MB/s eta 0:00:07
    --------------------------------------- 0.2/11.1 MB 1.8 MB/s eta 0:00:07
    --------------------------------------- 0.3/11.1 MB 1.9 MB/s eta 0:00:06
   - -------------------------------------- 0.3/11.1 MB 1.6 MB/s eta 0:00:07
   - -------------------------------------- 0.4/11.1 MB 1.8 MB/s eta 0:00:07
   - -------------------------------------- 0.5/11.1 MB 1.7 MB/s eta 0:00:

In [6]:
if torch.cuda.is_available():
    device = torch.cuda.current_device()
    print("Current device:", torch.cuda.get_device_name(device))
else:
    print("Fail to find GPU, Will use CPU.")
    device = 'cpu'

Current device: NVIDIA GeForce RTX 4060


In [35]:
### Loading ddataset

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X, y = iris.data, iris.target
num_classes = 3

In [51]:
### Split the dataset into training dataset and test dataset

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0
)

### Process the data
X_train = torch.tensor(X_train, dtype=torch.float)
X_test = torch.tensor(X_test, dtype=torch.float)

# y_train = torch.tensor(y_train, dtype=torch.long).unsqueeze(1)
# y_test = torch.tensor(y_test, dtype=torch.long).unsqueeze(1)
# y_train_one_hot = torch.zeros(len(y_train), num_classes, dtype=torch.float).scatter_(dim=1, index=y_train, value=1)
# # scatter 选取维度和位置变为1，将矩阵变为ont-hot
# # torch.tensor(y_train).unsqueeze(1)将类别从[3]变为[3, 1]
# y_test_one_hot = torch.zeros(len(y_test), num_classes, dtype=torch.float).scatter_(dim=1, index=y_test, value=1)

In [52]:
y_train = torch.tensor(y_train, dtype=torch.long)  # shape [N]
y_test = torch.tensor(y_test, dtype=torch.long)

In [53]:
### Intitialize a model with 4 hidden layers and a hidden layer size of 768.
### Define the model
xor_mlp_pytorch = nn.Sequential(
    nn.Linear(4, 768),
    nn.ReLU(),
    nn.Linear(768, 768),
    nn.ReLU(),
    nn.Linear(768, 768),
    nn.ReLU(),
    nn.Linear(768, 768),
    nn.ReLU(),
    nn.Linear(768, 3),
)

### initialize the weights
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
### apply tthe init weights
xor_mlp_pytorch.apply(init_weights)

### define loss fn
loss_fn = nn.CrossEntropyLoss()

### Define optimizer
optimizer = torch.optim.SGD(xor_mlp_pytorch.parameters(), lr=0.05)

In [54]:
# ### Training

# import time

# start = time.time()

# # Training loop
# epochs = 500
# for epoch in range(epochs):
#     optimizer.zero_grad()
#     output = xor_mlp_pytorch(X_train)
#     loss = loss_fn(output, y_train_one_hot)
#     loss.backward()
#     optimizer.step()

# end = time.time()
# print(end - start)


In [57]:
from torch.utils.data import DataLoader, TensorDataset

xor_mlp_pytorch = xor_mlp_pytorch.to(device)
X_train = X_train.to(device)
y_train = y_train.to(device)

# Create a DataLoader for batching
dataset = TensorDataset(X_train, y_train)
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

start = time.time()
for epoch in range(epochs):
    for batch_input, batch_target in dataloader:
        batch_input, batch_target = batch_input.to(device), batch_target.to(device)

        optimizer.zero_grad()
        output = xor_mlp_pytorch(batch_input)
        loss = loss_fn(output, batch_target)  # 传入类别索引，不是 one-hot
        loss.backward()
        optimizer.step()

end = time.time()
print(end - start)

1.8478806018829346


In [45]:
# ### Training using DataLoader, can be faster
# from torch.utils.data import DataLoader, TensorDataset

# xor_mlp_pytorch = xor_mlp_pytorch.to(device)
# X_train = X_train.to(device)
# y_train_one_hot = y_train_one_hot.to(device)

# # Create a DataLoader for batching
# dataset = TensorDataset(X_train, y_train_one_hot)
# batch_size = 64
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# start = time.time()

# #Training loop
# epochs = 500
# for epoch in range(epochs):
#     for batch_input, batch_target, in dataloader:
#         batch_input, batch_target = batch_input.to(device), batch_target.to(device)

#         optimizer.zero_grad()

#         output = xor_mlp_pytorch(batch_input)
#         loss = loss_fn(output, batch_target)
#         loss.backward()
#         optimizer.step()

# end = time.time()

# print(end - start)

1.9780936241149902


In [58]:
### Evaluation
import numpy as np

def accuracy(predictions, targets):
    accuracy = np.mean(predictions == targets)
    return accuracy

predictions = xor_mlp_pytorch(X_test.to(device))
predictions = predictions.argmax(dim=1)

targets = y_test
accuracy_value = accuracy(predictions.cpu().numpy(), targets.numpy())
print(accuracy_value)

0.9777777777777777


In [None]:
使用CrossEntropy计算loss的时候不需要用one hot编码，直接使用x，y的tensor形式就可以