In [1]:
import pandas as pd

In [2]:
import torch

In [3]:
import os

os.makedirs(os.path.join('.', 'data'), exist_ok=True)
data_file = os.path.join('.', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('''NumRooms,RoofType,Price
NA,NA,127500
2,NA,106000
4,Slate,178100
NA,NA,140000''')

In [5]:
data = pd.read_csv(data_file)
print(data)

   NumRooms RoofType   Price
0       NaN      NaN  127500
1       2.0      NaN  106000
2       4.0    Slate  178100
3       NaN      NaN  140000


### Data Preparation

In [6]:
inputs, targets = data.iloc[ :, 0:2], data.iloc[:, 2]
inputs = pd.get_dummies(inputs, dummy_na = True)
print(inputs)

   NumRooms  RoofType_Slate  RoofType_nan
0       NaN           False          True
1       2.0           False          True
2       4.0            True         False
3       NaN           False          True


In [7]:
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms  RoofType_Slate  RoofType_nan
0       3.0           False          True
1       2.0           False          True
2       4.0            True         False
3       3.0           False          True


### Conversion to Tensor Format

In [20]:
import numpy as np
input_numpy  = inputs.to_numpy(dtype=np.float32)
numpy_id = id(input_numpy)

X_numpy = torch.from_numpy(input_numpy)
numpy_X_id = id(X_numpy)

X = torch.tensor(input_numpy)
X_id = id(X)

print(numpy_id, numpy_X_id, X_id)
y = torch.tensor(targets.to_numpy(dtype=float))
X,y

1251804032592 1251804350416 1251790092432


(tensor([[3., 0., 1.],
         [2., 0., 1.],
         [4., 1., 0.],
         [3., 0., 1.]]),
 tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))

### Exercises

In [15]:
y = inputs.to_numpy(dtype = float)
torch.from_numpy(y)


tensor([[3., 0., 1.],
        [2., 0., 1.],
        [4., 1., 0.],
        [3., 0., 1.]], dtype=torch.float64)