#

## Device

In [7]:
import torch
device = (
    "mps"
    if getattr(torch, "has_mps", False)
    else "cuda"
    if torch.cuda.is_available()
    else "cpu"
)
print(f"Using device: {device}")

Using device: cpu


## Datová sada Auta

### Raw (surová data)

In [1]:
import aiohttp
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"

async with aiohttp.ClientSession() as session:
    async with session.get(url) as resp:
        # print(resp.status)
        textresponse = await resp.text()
print(textresponse[:1000])

18.0   8   307.0      130.0      3504.      12.0   70  1	"chevrolet chevelle malibu"
15.0   8   350.0      165.0      3693.      11.5   70  1	"buick skylark 320"
18.0   8   318.0      150.0      3436.      11.0   70  1	"plymouth satellite"
16.0   8   304.0      150.0      3433.      12.0   70  1	"amc rebel sst"
17.0   8   302.0      140.0      3449.      10.5   70  1	"ford torino"
15.0   8   429.0      198.0      4341.      10.0   70  1	"ford galaxie 500"
14.0   8   454.0      220.0      4354.       9.0   70  1	"chevrolet impala"
14.0   8   440.0      215.0      4312.       8.5   70  1	"plymouth fury iii"
14.0   8   455.0      225.0      4425.      10.0   70  1	"pontiac catalina"
15.0   8   390.0      190.0      3850.       8.5   70  1	"amc ambassador dpl"
15.0   8   383.0      170.0      3563.      10.0   70  1	"dodge challenger se"
14.0   8   340.0      160.0      3609.       8.0   70  1	"plymouth 'cuda 340"
15.0   8   400.0      150.0      3761.       9.5   70  1	"chevrolet monte ca

### Reformat

In [2]:
import re
textresponse = re.sub(' +', ' ', textresponse)
textresponse = re.sub('\t', ' ', textresponse)
print(textresponse[:1000])

18.0 8 307.0 130.0 3504. 12.0 70 1 "chevrolet chevelle malibu"
15.0 8 350.0 165.0 3693. 11.5 70 1 "buick skylark 320"
18.0 8 318.0 150.0 3436. 11.0 70 1 "plymouth satellite"
16.0 8 304.0 150.0 3433. 12.0 70 1 "amc rebel sst"
17.0 8 302.0 140.0 3449. 10.5 70 1 "ford torino"
15.0 8 429.0 198.0 4341. 10.0 70 1 "ford galaxie 500"
14.0 8 454.0 220.0 4354. 9.0 70 1 "chevrolet impala"
14.0 8 440.0 215.0 4312. 8.5 70 1 "plymouth fury iii"
14.0 8 455.0 225.0 4425. 10.0 70 1 "pontiac catalina"
15.0 8 390.0 190.0 3850. 8.5 70 1 "amc ambassador dpl"
15.0 8 383.0 170.0 3563. 10.0 70 1 "dodge challenger se"
14.0 8 340.0 160.0 3609. 8.0 70 1 "plymouth 'cuda 340"
15.0 8 400.0 150.0 3761. 9.5 70 1 "chevrolet monte carlo"
14.0 8 455.0 225.0 3086. 10.0 70 1 "buick estate wagon (sw)"
24.0 4 113.0 95.00 2372. 15.0 70 3 "toyota corona mark ii"
22.0 6 198.0 95.00 2833. 15.5 70 1 "plymouth duster"
18.0 6 199.0 97.00 2774. 15.5 70 1 "amc hornet"
21.0 6 200.0 85.00 2587. 16.0 70 1 "ford maverick"
27.0 4 97.00 8

### Načtení do Pandas

In [3]:
import pandas as pd
from io import StringIO 


def getAutoDataFrame():
    df = pd.read_csv(StringIO(textresponse), sep=" ", names=["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "year", "origin", "name"], na_values=["?"])
    return df

pd.set_option("display.max_columns", 7)

df = getAutoDataFrame()
df

Unnamed: 0,mpg,cylinders,displacement,...,year,origin,name
0,18.0,8,307.0,...,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,...,70,1,buick skylark 320
2,18.0,8,318.0,...,70,1,plymouth satellite
3,16.0,8,304.0,...,70,1,amc rebel sst
4,17.0,8,302.0,...,70,1,ford torino
...,...,...,...,...,...,...,...
393,27.0,4,140.0,...,82,1,ford mustang gl
394,44.0,4,97.0,...,82,2,vw pickup
395,32.0,4,135.0,...,82,1,dodge rampage
396,28.0,4,120.0,...,82,1,ford ranger


### Transformace dat pro učení sítí

In [14]:
import time

import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

cars = df["name"]

# Handle missing value
df["horsepower"] = df["horsepower"].fillna(df["horsepower"].median())

# Pandas to Numpy
x = df[
    [
        "cylinders",
        "displacement",
        "horsepower",
        "weight",
        "acceleration",
        "year",
        "origin",
    ]
].values
y = df["mpg"].values  # regression

# Split into validation and training sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.25, random_state=42
)

x_train = torch.tensor(x_train, device=device, dtype=torch.float32)
y_train = torch.tensor(y_train, device=device, dtype=torch.float32)

x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
y_test = torch.tensor(y_test, device=device, dtype=torch.float32)

In [15]:
x_train

tensor([[  4.0000,  98.0000,  66.0000,  ...,  14.4000,  78.0000,   1.0000],
        [  4.0000, 108.0000,  94.0000,  ...,  16.5000,  73.0000,   3.0000],
        [  6.0000, 199.0000,  97.0000,  ...,  15.5000,  70.0000,   1.0000],
        ...,
        [  4.0000, 134.0000,  95.0000,  ...,  14.8000,  78.0000,   3.0000],
        [  4.0000,  89.0000,  62.0000,  ...,  17.3000,  81.0000,   3.0000],
        [  4.0000,  97.0000,  46.0000,  ...,  21.0000,  73.0000,   2.0000]])

## Neuronové sítě reprezentované sekvencemi

In [17]:
import time

import numpy as np
import pandas as pd
import torch
import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

# Create datasets
BATCH_SIZE = 16

dataset_train = TensorDataset(x_train, y_train)
dataloader_train = DataLoader(
    dataset_train, batch_size=BATCH_SIZE, shuffle=True)

dataset_test = TensorDataset(x_test, y_test)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)

# Create model using nn.Sequential
model = nn.Sequential(
    nn.Linear(x_train.shape[1], 50), 
    nn.ReLU(), 
    nn.Linear(50, 25), 
    nn.ReLU(), 
    nn.Linear(25, 1)
)

## Neuronové sítě reprezentované třídami

In [18]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.autograd import Variable


class Net(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Net, self).__init__()

        # Define each of the layers
        self.layer1 = nn.Linear(input_dim, 50)
        self.layer2 = nn.Linear(50, 25)
        self.layer3 = nn.Linear(25, output_dim)

    def forward(self, x):
        # Pass the input through each of the layers
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)


# Replace missing horsepower values with median
df["horsepower"] = df["horsepower"].fillna(df["horsepower"].median())

# Convert pandas DataFrame to PyTorch tensors
x = torch.tensor(
    df[
        [
            "cylinders",
            "displacement",
            "horsepower",
            "weight",
            "acceleration",
            "year",
            "origin",
        ]
    ].values,
    device=device,
    dtype=torch.float32,
)
y = torch.tensor(df["mpg"].values, device=device,
                 dtype=torch.float32)  # regression

# Initialize the model, loss function, and optimizer
model = Net(x.shape[1], 1).to(device)
model = torch.compile(model,backend="aot_eager").to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(1000):
    # Zero gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = model(x).flatten()

    # Compute loss
    loss = loss_fn(outputs, y)

    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, loss: {loss.item()}")

Epoch 0, loss: 3017.869384765625
Epoch 100, loss: 163.57090759277344
Epoch 200, loss: 107.23971557617188
Epoch 300, loss: 50.896141052246094
Epoch 400, loss: 36.833656311035156
Epoch 500, loss: 29.562450408935547
Epoch 600, loss: 23.77164649963379
Epoch 700, loss: 18.963064193725586
Epoch 800, loss: 15.511846542358398
Epoch 900, loss: 219.20545959472656


## Testování

In [19]:
from sklearn import metrics

# Measure RMSE error.  RMSE is common for regression.
pred = model(x_test)
score = torch.sqrt(torch.nn.functional.mse_loss(pred.flatten(), y_test))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 3.7692348957061768
