In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.nn.functional import one_hot

In [None]:
url = 'http://archive.ics.uci.edu/ml/' \
      'machine-learning-databases/auto-mpg/auto-mpg.data'

column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
        'Weight', 'Acceleration', 'Model Year', 'Origin']

df = pd.read_csv(url,names=column_names, sep=' ', comment='\t',na_values = "?",skipinitialspace=True)

# Drop the NA rows
df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)    # assigning a new default integer index to the DataFrame

# Normalizing our data
numerical_columns = ['Cylinders','Displacement','Horsepower','Weight','Acceleration']
for col in numerical_columns:
    df[col] = (df[col] - df[col].mean()) / df[col].std()
    
# Map the value of the origin column to 0, 1, and 2
def reset_fn(x):
    return x-1
df['Origin'] = df['Origin'].apply(reset_fn)

# Deal with year cloumn
boundries = torch.tensor([73,76,79])
v = torch.tensor(df['Model Year'].values)
df['Model Year'] = torch.bucketize(v,boundries,right=True) # right means strictly closed in the right x < 73 .....

# encode the Origin column
data = torch.cat([torch.tensor((df.drop(columns=['Origin'])).values),one_hot(torch.tensor(df['Origin'].values),num_classes=df['Origin'].nunique())[:,1:]],dim=1)
data[:10]

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,1.482053,1.075915,0.663285,0.619748,-1.283618,0,0
1,15.0,1.482053,1.486832,1.572585,0.842258,-1.464852,0,0
2,18.0,1.482053,1.181033,1.182885,0.539692,-1.646086,0,0
3,16.0,1.482053,1.047246,1.182885,0.53616,-1.283618,0,0
4,17.0,1.482053,1.028134,0.923085,0.554997,-1.82732,0,0


In [10]:
data = torch.cat([torch.tensor((df.drop(columns=['Origin'])).values),one_hot(torch.tensor(df['Origin'].values),num_classes=df['Origin'].nunique())[:,1:]],dim=1)
data[:10]

tensor([[18.0000,  1.4821,  1.0759,  0.6633,  0.6197, -1.2836,  0.0000,  0.0000,
          0.0000],
        [15.0000,  1.4821,  1.4868,  1.5726,  0.8423, -1.4649,  0.0000,  0.0000,
          0.0000],
        [18.0000,  1.4821,  1.1810,  1.1829,  0.5397, -1.6461,  0.0000,  0.0000,
          0.0000],
        [16.0000,  1.4821,  1.0472,  1.1829,  0.5362, -1.2836,  0.0000,  0.0000,
          0.0000],
        [17.0000,  1.4821,  1.0281,  0.9231,  0.5550, -1.8273,  0.0000,  0.0000,
          0.0000],
        [15.0000,  1.4821,  2.2418,  2.4299,  1.6051, -2.0086,  0.0000,  0.0000,
          0.0000],
        [14.0000,  1.4821,  2.4807,  3.0015,  1.6205, -2.3710,  0.0000,  0.0000,
          0.0000],
        [14.0000,  1.4821,  2.3469,  2.8716,  1.5710, -2.5523,  0.0000,  0.0000,
          0.0000],
        [14.0000,  1.4821,  2.4902,  3.1314,  1.7040, -2.0086,  0.0000,  0.0000,
          0.0000],
        [15.0000,  1.4821,  1.8691,  2.2221,  1.0271, -2.5523,  0.0000,  0.0000,
          0.0000]],