# Title

#### Subtitle

In [19]:
# imports

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OrdinalEncoder

## Summary

## Introduction

## Methods

### Data

### Analysis

## Results & Discussion

In [16]:
# import raw data

# data located at https://archive.ics.uci.edu/dataset/19/car+evaluation

colnames = ['buying','maint','doors','persons','lug_boot','safety','class']
car_data = pd.read_csv('../data/raw/car.data', names=colnames, header=None)
# car_data.isnull().any() # categorigal features, no missing values

car_data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [24]:
# train test split, export to csv
np.random.seed(522)

car_train, car_test = train_test_split(car_data, train_size = 0.8, stratify=car_data['class'])
car_train.to_csv('../data/processed/car_train.csv')
car_test.to_csv('../data/processed/car_test.csv')

X_train, y_train = car_train.drop(columns=['class']), car_train['class']
X_test, y_test = car_test.drop(columns=['class']), car_test['class']

In [48]:
# preprocessing

# transform categorical features
# buying: ordinal encoding; order = ['low','med','high','vhigh']
# maint: ordinal encoding; order = ['low','med','high','vhigh']
# doors: ordinal encoding; order = ['2','3','4','5more']
# persons: ordinal encoding; order = ['2','4','more']
# lug_boot: ordinal encoding; order = ['small','med','big']
# safety: ordinal encoding; order = ['low','med','high']

car_preprocessor = make_column_transformer(
    (OrdinalEncoder(categories=[['low','med','high','vhigh']]), ['buying']),
    (OrdinalEncoder(categories=[['low','med','high','vhigh']]), ['maint']),
    (OrdinalEncoder(categories=[['2','3','4','5more']]), ['doors']),
    (OrdinalEncoder(categories=[['2','4','more']]), ['persons']),
    (OrdinalEncoder(categories=[['small','med','big']]), ['lug_boot']),
    (OrdinalEncoder(categories=[['low','med','high']]), ['safety']),
    verbose_feature_names_out = False
)

car_preprocessor.fit(car_train)
encoded_car_train = car_preprocessor.transform(car_train)
encoded_car_test = car_preprocessor.transform(car_test)
pd.DataFrame(encoded_car_train)
# car_train


# encoded_car_train.to_csv('../data/encoded_car_train.csv')
# encoded_car_test.to_csv('../data/encoded_car_train.csv')

Unnamed: 0,0,1,2,3,4,5
0,1.0,0.0,1.0,0.0,1.0,2.0
1,3.0,0.0,2.0,2.0,1.0,1.0
2,3.0,3.0,1.0,2.0,1.0,2.0
3,3.0,3.0,2.0,2.0,2.0,2.0
4,0.0,1.0,2.0,2.0,1.0,0.0
...,...,...,...,...,...,...
1377,2.0,2.0,0.0,0.0,0.0,0.0
1378,2.0,3.0,0.0,1.0,2.0,0.0
1379,2.0,3.0,0.0,0.0,2.0,0.0
1380,3.0,0.0,1.0,2.0,2.0,2.0


## References