In [556]:
import typing as t
import nltk
from pathlib import Path
import torch

import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader, Dataset, Subset, random_split

In [557]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\super\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\super\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\super\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\super\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\super\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [558]:
DATA_DIR = Path("data/")

In [559]:
def torch_train_test_split(dataset: t.Union[Dataset, t.Sized], train_part: float) -> t.Tuple[Subset, Subset]:
    train_size = round(train_part * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, lengths=(train_size, test_size))
    return train_dataset, test_dataset


def na_stat(df: pd.DataFrame) -> pd.Series:
    stat = df.isna().sum()
    return stat[stat > 0]


def duplicates_stat(df: pd.DataFrame) -> pd.Series:
    return df[df.duplicated()].value_counts()


def get_categorical_columns(df: pd.DataFrame) -> pd.Index:
    return df.select_dtypes(object).columns


def get_numerical_columns(df: pd.DataFrame) -> pd.Index:
    return df.select_dtypes(np.number).columns


def count_categories(df: pd.DataFrame) -> pd.Series:
    return df[get_categorical_columns(df)].nunique()

# Regression

## concrete.csv

In [560]:
concrete_df: pd.DataFrame = pd.read_csv(DATA_DIR / "regression/concrete.csv")
print(concrete_df.shape)
concrete_df.head()

(1030, 9)


Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [561]:
na_stat(concrete_df)

Series([], dtype: int64)

In [562]:
duplicates_stat(concrete_df)

Cement (component 1)(kg in a m^3 mixture)  Blast Furnace Slag (component 2)(kg in a m^3 mixture)  Fly Ash (component 3)(kg in a m^3 mixture)  Water  (component 4)(kg in a m^3 mixture)  Superplasticizer (component 5)(kg in a m^3 mixture)  Coarse Aggregate  (component 6)(kg in a m^3 mixture)  Fine Aggregate (component 7)(kg in a m^3 mixture)  Age (day)  strength
362.6                                      189.0                                                  0.0                                         164.9                                      11.6                                                 944.7                                                 755.8                                              3          35.30       3
                                                                                                                                                                                                                                                                             

In [563]:
concrete_df = concrete_df.drop_duplicates()
duplicates_stat(concrete_df)

Series([], dtype: int64)

In [564]:
print(concrete_df.shape)
concrete_df.head()

(1005, 9)


Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [565]:
class ConcreteDataset(Dataset):
    scaler: StandardScaler
    n_features: int
    data: torch.Tensor
    targets: torch.Tensor

    def __init__(self, df: pd.DataFrame, scaler: StandardScaler = None):
        if scaler:
            self.scaler = scaler
            scale = self.scaler.transform
        else:
            self.scaler = StandardScaler()
            scale = self.scaler.fit_transform

        target_col = "strength"
        data, targets = df.drop(columns=[target_col]), df[target_col]

        self.data = torch.tensor(scale(data), dtype=torch.float)
        self.targets = torch.tensor(targets.to_numpy(), dtype=torch.float).unsqueeze(1)
        self.n_features = self.data.size(1)

    def __len__(self):
        return self.targets.size(0)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [566]:
concrete_train_df, concrete_test_df = train_test_split(concrete_df, test_size=0.2, random_state=0)

concrete_train_dataset = ConcreteDataset(concrete_train_df)
concrete_test_dataset = ConcreteDataset(concrete_test_df, scaler=concrete_train_dataset.scaler)
len(concrete_train_dataset), len(concrete_test_dataset), concrete_train_dataset.n_features

(804, 201, 8)

In [567]:
concrete_train_dataloader = DataLoader(concrete_train_dataset, batch_size=4, shuffle=True)
concrete_test_dataloader = DataLoader(concrete_test_dataset, batch_size=len(concrete_test_dataset))

In [568]:
next(iter(concrete_train_dataloader))

[tensor([[-1.0964, -0.8320,  1.7125, -0.2451, -0.2666,  1.0806,  0.0913,  0.1563],
         [ 2.5074, -0.8320, -0.8459, -0.4085, -1.0051,  1.9459, -1.9638, -0.5089],
         [-0.4998,  3.0783, -0.8459,  0.1843, -1.0051, -0.2590, -1.2099, -0.2872],
         [ 0.3195,  1.0083, -0.8459, -0.1751,  0.6360, -0.7646, -0.1682, -0.2872]]),
 tensor([[28.6300],
         [59.7600],
         [39.7000],
         [52.4400]])]

## gold.csv

## house.csv

In [569]:
house_df: pd.DataFrame = pd.read_csv(DATA_DIR / "regression/house.csv")
print(house_df.shape)
house_df.head()

(1460, 81)


Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [570]:
house_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          1460 non-null   object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [571]:
# https://www.kaggle.com/code/emmanueldjegou/house-prices-advanced-regression-techniques/notebook
house_df = house_df[[
    "SalePrice",
    "OverallQual",
    "GrLivArea",
    "TotalBsmtSF",
    "GarageCars",
    "BsmtFinSF1",
    "Fireplaces",
    "Foundation",
    "BsmtQual",
    "KitchenQual",
    "WoodDeckSF",
    "LotShape",
    "Neighborhood",
    "HouseStyle",
    "SaleCondition",
]]

In [572]:
na_stat(house_df)

Series([], dtype: int64)

In [573]:
duplicates_stat(house_df)

SalePrice  OverallQual  GrLivArea  TotalBsmtSF  GarageCars  BsmtFinSF1  Fireplaces  Foundation  BsmtQual  KitchenQual  WoodDeckSF  LotShape  Neighborhood  HouseStyle  SaleCondition
151000     7            1200       600          2           0           0           PConc       Gd        Gd           0           Reg       Somerst       2Story      Normal           1
dtype: int64

In [574]:
house_df = house_df.drop_duplicates()
duplicates_stat(house_df)

Series([], dtype: int64)

In [575]:
count_categories(house_df)

Foundation        6
BsmtQual          5
KitchenQual       4
LotShape          4
Neighborhood     25
HouseStyle        8
SaleCondition     6
dtype: int64

In [576]:
print(house_df.shape)
house_df.head()

(1459, 15)


Unnamed: 0,SalePrice,OverallQual,GrLivArea,TotalBsmtSF,GarageCars,BsmtFinSF1,Fireplaces,Foundation,BsmtQual,KitchenQual,WoodDeckSF,LotShape,Neighborhood,HouseStyle,SaleCondition
0,208500,7,1710,856,2,706,0,PConc,Gd,Gd,0,Reg,CollgCr,2Story,Normal
1,181500,6,1262,1262,2,978,1,CBlock,Gd,TA,298,Reg,Veenker,1Story,Normal
2,223500,7,1786,920,2,486,1,PConc,Gd,Gd,0,IR1,CollgCr,2Story,Normal
3,140000,7,1717,756,3,216,1,BrkTil,TA,Gd,0,IR1,Crawfor,2Story,Abnorml
4,250000,8,2198,1145,3,655,1,PConc,Gd,Gd,192,IR1,NoRidge,2Story,Normal


In [577]:
class HouseDataset(Dataset):
    encoder: OrdinalEncoder
    scaler: StandardScaler
    n_features: int
    data: torch.Tensor
    targets: torch.Tensor

    def __init__(self, df: pd.DataFrame, encoder: OrdinalEncoder = None, scaler: StandardScaler = None):
        if encoder:
            self.encoder = encoder
            encode = self.encoder.transform
        else:
            self.encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
            encode = self.encoder.fit_transform

        if scaler:
            self.scaler = scaler
            scale = self.scaler.transform
        else:
            self.scaler = StandardScaler()
            scale = self.scaler.fit_transform

        target_col = "SalePrice"
        data, targets = df.drop(columns=[target_col]), df[target_col]

        encode_cols, scale_cols = get_categorical_columns(data), get_numerical_columns(data)
        data[encode_cols] = encode(data[encode_cols])
        data[scale_cols] = scale(data[scale_cols])

        self.data = torch.tensor(data.to_numpy(), dtype=torch.float)
        self.targets = torch.tensor(targets.to_numpy(), dtype=torch.float).unsqueeze(1)
        self.n_features = self.data.size(1)

    def __len__(self):
        return self.targets.size(0)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [578]:
house_train_df, house_test_df = train_test_split(house_df, test_size=0.2, random_state=0)

house_train_dataset = HouseDataset(house_train_df)
house_test_dataset = HouseDataset(
    house_test_df,
    encoder=house_train_dataset.encoder,
    scaler=house_train_dataset.scaler,
)
len(house_train_dataset), len(house_test_dataset), house_train_dataset.n_features

(1167, 292, 14)

In [579]:
house_train_dataloader = DataLoader(house_train_dataset, batch_size=4, shuffle=True)
house_test_dataloader = DataLoader(house_test_dataset, batch_size=len(house_test_dataset))

In [580]:
next(iter(house_train_dataloader))

[tensor([[-0.8035, -1.4647, -0.9271,  0.3007, -1.0246, -0.9618,  0.0000,  1.0000,
           3.0000, -0.7441,  2.0000, 17.0000,  2.0000,  4.0000],
         [ 1.4012,  0.2388,  1.3683,  1.6558, -1.0246,  0.5949,  2.0000,  2.0000,
           2.0000, -0.7441,  0.0000, 16.0000,  2.0000,  4.0000],
         [-0.8035, -0.9242, -2.5771,  0.3007, -1.0246, -0.9618,  1.0000,  3.0000,
           3.0000, -0.7441,  3.0000, 12.0000,  2.0000,  4.0000],
         [ 0.6663, -0.5170,  0.4511,  0.3007, -0.9881, -0.9618,  2.0000,  2.0000,
           2.0000,  0.1205,  3.0000,  0.0000,  2.0000,  5.0000]]),
 tensor([[ 73000.],
         [250000.],
         [109500.],
         [167240.]])]

## insurance.csv

In [581]:
insurance_df: pd.DataFrame = pd.read_csv(DATA_DIR / "regression/insurance.csv")
print(insurance_df.shape)
insurance_df.head()

(1338, 7)


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [582]:
na_stat(insurance_df)

Series([], dtype: int64)

In [583]:
duplicates_stat(insurance_df)

age  sex   bmi    children  smoker  region     charges  
19   male  30.59  0         no      northwest  1639.5631    1
dtype: int64

In [584]:
insurance_df = insurance_df.drop_duplicates()
duplicates_stat(insurance_df)

Series([], dtype: int64)

In [585]:
count_categories(insurance_df)

sex       2
smoker    2
region    4
dtype: int64

In [586]:
print(insurance_df.shape)
insurance_df.head()

(1337, 7)


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [587]:
class InsuranceDataset(Dataset):
    encoder: OrdinalEncoder
    scaler: StandardScaler
    n_features: int
    data: torch.Tensor
    targets: torch.Tensor

    def __init__(self, df: pd.DataFrame, encoder: OrdinalEncoder = None, scaler: StandardScaler = None):
        if encoder:
            self.encoder = encoder
            encode = self.encoder.transform
        else:
            self.encoder = OrdinalEncoder()
            encode = self.encoder.fit_transform

        if scaler:
            self.scaler = scaler
            scale = self.scaler.transform
        else:
            self.scaler = StandardScaler()
            scale = self.scaler.fit_transform

        target_col = "charges"
        data, targets = df.drop(columns=[target_col]), df[target_col]

        encode_cols, scale_cols = get_categorical_columns(data), get_numerical_columns(data)
        data[encode_cols] = encode(data[encode_cols])
        data[scale_cols] = scale(data[scale_cols])

        self.data = torch.tensor(data.to_numpy(), dtype=torch.float)
        self.targets = torch.tensor(targets.to_numpy(), dtype=torch.float).unsqueeze(1)
        self.n_features = self.data.size(1)

    def __len__(self):
        return self.targets.size(0)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [588]:
insurance_train_df, insurance_test_df = train_test_split(insurance_df, test_size=0.2, random_state=0)

insurance_train_dataset = InsuranceDataset(insurance_train_df)
insurance_test_dataset = InsuranceDataset(
    insurance_test_df,
    encoder=insurance_train_dataset.encoder,
    scaler=insurance_train_dataset.scaler,
)
len(insurance_train_dataset), len(insurance_test_dataset), insurance_train_dataset.n_features

(1069, 268, 6)

In [589]:
insurance_train_dataloader = DataLoader(insurance_train_dataset, batch_size=4, shuffle=True)
insurance_test_dataloader = DataLoader(insurance_test_dataset, batch_size=len(insurance_test_dataset))

In [590]:
next(iter(insurance_train_dataloader))

[tensor([[ 0.7586,  1.0000,  2.3077, -0.0654,  0.0000,  2.0000],
         [ 1.1132,  0.0000, -0.1763,  0.7668,  0.0000,  3.0000],
         [ 1.7513,  1.0000, -1.0005, -0.0654,  0.0000,  1.0000],
         [-0.0214,  1.0000, -0.1392, -0.0654,  1.0000,  0.0000]]),
 tensor([[ 9058.7305],
         [11881.3584],
         [30166.6191],
         [22462.0430]])]

## vehicle.csv

In [591]:
vehicle_df: pd.DataFrame = pd.read_csv(DATA_DIR / "regression/vehicle.csv")
print(vehicle_df.shape)
vehicle_df.head()

(8128, 13)


Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14 kmpl,1498 CC,103.52 bhp,250Nm@ 1500-2500rpm,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7 kmpl,1497 CC,78 bhp,"12.7@ 2,700(kgm@ rpm)",5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0 kmpl,1396 CC,90 bhp,22.4 kgm at 1750-2750rpm,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1 kmpl,1298 CC,88.2 bhp,"11.5@ 4,500(kgm@ rpm)",5.0


In [592]:
na_stat(vehicle_df)

mileage      221
engine       221
max_power    215
torque       222
seats        221
dtype: int64

In [593]:
vehicle_df = vehicle_df.dropna()
na_stat(vehicle_df)

Series([], dtype: int64)

In [594]:
vehicle_df = vehicle_df.drop(columns=["name", "mileage", "torque"])
vehicle_df["max_power"] = vehicle_df["max_power"].str.replace(" bhp", "").astype(float)

In [595]:
duplicates_stat(vehicle_df)

year  selling_price  km_driven  fuel    seller_type       transmission  owner         engine   max_power  seats
2017  3200000        45000      Diesel  Dealer            Automatic     First Owner   1999 CC  177.00     5.0      33
2019  5150000        20000      Petrol  Dealer            Automatic     First Owner   2487 CC  214.56     5.0      33
2016  2000000        68089      Petrol  Trustmark Dealer  Automatic     First Owner   2494 CC  157.70     5.0      31
      550000         56494      Petrol  Trustmark Dealer  Manual        First Owner   1199 CC  88.70      5.0      31
2013  750000         79328      Diesel  Trustmark Dealer  Manual        Second Owner  2494 CC  100.60     7.0      30
                                                                                                                   ..
2015  245000         40000      Petrol  Individual        Manual        First Owner   796 CC   47.30      5.0       1
      250000         80000      Diesel  Individual        Manu

In [596]:
vehicle_df = vehicle_df.drop_duplicates()
duplicates_stat(vehicle_df)

Series([], dtype: int64)

In [597]:
count_categories(vehicle_df)

fuel              4
seller_type       3
transmission      2
owner             5
engine          121
dtype: int64

In [598]:
print(vehicle_df.shape)
vehicle_df.head()

(6690, 10)


Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,engine,max_power,seats
0,2014,450000,145500,Diesel,Individual,Manual,First Owner,1248 CC,74.0,5.0
1,2014,370000,120000,Diesel,Individual,Manual,Second Owner,1498 CC,103.52,5.0
2,2006,158000,140000,Petrol,Individual,Manual,Third Owner,1497 CC,78.0,5.0
3,2010,225000,127000,Diesel,Individual,Manual,First Owner,1396 CC,90.0,5.0
4,2007,130000,120000,Petrol,Individual,Manual,First Owner,1298 CC,88.2,5.0


In [599]:
class VehicleDataset(Dataset):
    encoder: OrdinalEncoder
    scaler: StandardScaler
    n_features: int
    data: torch.Tensor
    targets: torch.Tensor

    def __init__(self, df: pd.DataFrame, encoder: OrdinalEncoder = None, scaler: StandardScaler = None):
        if encoder:
            self.encoder = encoder
            encode = self.encoder.transform
        else:
            self.encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
            encode = self.encoder.fit_transform

        if scaler:
            self.scaler = scaler
            scale = self.scaler.transform
        else:
            self.scaler = StandardScaler()
            scale = self.scaler.fit_transform

        target_col = "selling_price"
        data, targets = df.drop(columns=[target_col]), df[target_col]

        encode_cols, scale_cols = get_categorical_columns(data), get_numerical_columns(data)
        data[encode_cols] = encode(data[encode_cols])
        data[scale_cols] = scale(data[scale_cols])

        self.data = torch.tensor(data.to_numpy(), dtype=torch.float)
        self.targets = torch.tensor(targets.to_numpy(), dtype=torch.float).unsqueeze(1)
        self.n_features = self.data.size(1)

    def __len__(self):
        return self.targets.size(0)

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

In [600]:
vehicle_train_df, vehicle_test_df = train_test_split(vehicle_df, test_size=0.2, random_state=0)

vehicle_train_dataset = VehicleDataset(vehicle_train_df)
vehicle_test_dataset = VehicleDataset(
    vehicle_test_df,
    encoder=vehicle_train_dataset.encoder,
    scaler=vehicle_train_dataset.scaler,
)
len(vehicle_train_dataset), len(vehicle_test_dataset), vehicle_train_dataset.n_features

(5352, 1338, 9)

In [601]:
vehicle_train_dataloader = DataLoader(vehicle_train_dataset, batch_size=4, shuffle=True)
vehicle_test_dataloader = DataLoader(vehicle_test_dataset, batch_size=len(vehicle_test_dataset))

In [602]:
next(iter(vehicle_train_dataloader))

[tensor([[-1.5238e-01, -6.8938e-01,  3.0000e+00,  0.0000e+00,  1.0000e+00,
           0.0000e+00,  2.5000e+01,  3.4118e-01,  1.5876e+00],
         [ 6.1918e-01, -6.3869e-02,  1.0000e+00,  1.0000e+00,  1.0000e+00,
           0.0000e+00,  1.3000e+01,  1.8385e-02, -4.4539e-01],
         [-1.5238e-01,  2.6187e-01,  1.0000e+00,  1.0000e+00,  1.0000e+00,
           2.0000e+00,  3.8000e+01,  1.2042e+00, -4.4539e-01],
         [ 8.7636e-01, -8.7892e-01,  3.0000e+00,  1.0000e+00,  1.0000e+00,
           0.0000e+00,  1.1800e+02, -6.5450e-01, -4.4539e-01]]),
 tensor([[350000.],
         [800000.],
         [445000.],
         [350000.]])]

# Feature classification

# Image classification

# Text classification