In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.nn.functional import one_hot

In [2]:
url = 'http://archive.ics.uci.edu/ml/' \
      'machine-learning-databases/auto-mpg/auto-mpg.data'

column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
        'Weight', 'Acceleration', 'Model Year', 'Origin']

df = pd.read_csv(url,names=column_names, sep=' ', comment='\t',na_values = "?",skipinitialspace=True)

# Drop the NA rows
df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)    # assigning a new default integer index to the DataFrame

# Convert The Model Year column
boundries = torch.tensor([73,76,79])
v = torch.tensor(df['Model Year'].values)
df['Model Year'] = torch.bucketize(v,boundries,right=True)

# One-Hot-Encoding for the 'Original' column
origin_encoded = one_hot(torch.tensor((df['Origin']-1).values),num_classes=df['Origin'].nunique())[:,1:]
data = torch.cat([torch.tensor(df.drop(columns='Origin').values),origin_encoded],dim=1)
column_names_encoded = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
        'Weight', 'Acceleration', 'Model Year', 'Europe','Japan']
df = pd.DataFrame(data,columns=column_names_encoded)

df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Europe,Japan
0,18.0,8.0,307.0,130.0,3504.0,12.0,0.0,0.0,0.0
1,15.0,8.0,350.0,165.0,3693.0,11.5,0.0,0.0,0.0
2,18.0,8.0,318.0,150.0,3436.0,11.0,0.0,0.0,0.0
3,16.0,8.0,304.0,150.0,3433.0,12.0,0.0,0.0,0.0
4,17.0,8.0,302.0,140.0,3449.0,10.5,0.0,0.0,0.0


In [3]:
df['Europe'].unique()

array([0., 1.])