In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [1]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 
  
# metadata 
print(car_evaluation.metadata) 
  
# variable information 
print(car_evaluation.variables) 


{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'ID': 249, 'type': 'NATIVE', 'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'venue': '8th Intl Workshop on Expert Systems and their Applications, 

In [9]:
X.head(1)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low


In [14]:
X['safety'].value_counts()

safety
low     576
med     576
high    576
Name: count, dtype: int64

In [17]:
df = pd.concat([X, y], axis=1)

In [18]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   object
 3   persons   1728 non-null   object
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   class     1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


In [27]:
df['buying'].value_counts()

buying
vhigh    432
high     432
med      432
low      432
Name: count, dtype: int64

In [29]:
from sklearn.preprocessing import OrdinalEncoder

In [40]:
en = OrdinalEncoder(categories=[['low', 'med', 'high', 'vhigh'],[ 'low', 'med', 'high', 'vhigh'], ['2', '3', '4', '5more'], ['2', '4', 'more'], ['small', 'med', 'big'], ['low', 'med', 'high'], ['unacc', 'acc', 'good', 'vgood']])


In [44]:
en.fit_transform(df[['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']])

array([[3., 3., 0., ..., 0., 0., 0.],
       [3., 3., 0., ..., 0., 1., 0.],
       [3., 3., 0., ..., 0., 2., 0.],
       ...,
       [0., 0., 3., ..., 2., 0., 0.],
       [0., 0., 3., ..., 2., 1., 2.],
       [0., 0., 3., ..., 2., 2., 3.]], shape=(1728, 7))

In [63]:
pd.DataFrame(en.fit_transform(df[['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']]), columns=en.get_feature_names_out()).sample(5)

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
1702,0.0,0.0,3.0,0.0,0.0,1.0,0.0
1650,0.0,0.0,1.0,0.0,1.0,0.0,0.0
690,2.0,1.0,1.0,1.0,2.0,0.0,0.0
1156,1.0,1.0,2.0,2.0,1.0,1.0,1.0
745,2.0,1.0,3.0,1.0,2.0,1.0,1.0


In [39]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class,buy_en
0,vhigh,vhigh,2,2,small,low,unacc,3.0
1,vhigh,vhigh,2,2,small,med,unacc,3.0
2,vhigh,vhigh,2,2,small,high,unacc,3.0
3,vhigh,vhigh,2,2,med,low,unacc,3.0
4,vhigh,vhigh,2,2,med,med,unacc,3.0
