In [1]:
# Import dependencies
using ScikitLearn, CSV, DataFrames
@sk_import preprocessing: LabelEncoder
@sk_import model_selection: train_test_split
@sk_import naive_bayes: CategoricalNB
@sk_import metrics: accuracy_score
@sk_import tree: DecisionTreeClassifier
@sk_import svm: SVC
@sk_import neural_network: MLPClassifier

└ @ ScikitLearn.Skcore /Users/jackson/.julia/packages/ScikitLearn/NJwUf/src/Skcore.jl:179


PyObject <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>

# Car Eval Dataset
## ETL

In [2]:
# Read car data csv into DataFrame
car_columns = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
car_df = CSV.File("./data/car.data", header=car_columns) |> DataFrame
first(car_df, 5)

Unnamed: 0_level_0,buying,maint,doors,persons,lug_boot,safety,class
Unnamed: 0_level_1,String,String,String,String,String,String,String
1,vhigh,vhigh,2,2,small,low,unacc
2,vhigh,vhigh,2,2,small,med,unacc
3,vhigh,vhigh,2,2,small,high,unacc
4,vhigh,vhigh,2,2,med,low,unacc
5,vhigh,vhigh,2,2,med,med,unacc


In [3]:
# Encode values for car data
le = LabelEncoder()
for col in car_columns
    car_df[!,col] = le.fit_transform(car_df[!,col])
end
first(car_df, 5)

Unnamed: 0_level_0,buying,maint,doors,persons,lug_boot,safety,class
Unnamed: 0_level_1,Int64,Int64,Int64,Int64,Int64,Int64,Int64
1,3,3,0,0,2,1,2
2,3,3,0,0,2,2,2
3,3,3,0,0,2,0,2
4,3,3,0,0,1,1,2
5,3,3,0,0,1,2,2


## Data Preprocessing

In [4]:
# Feature and Label extraction
X_features = convert(Array, car_df[:,[1, 2, 3, 4, 5, 6]]);
y_labels = convert(Array, car_df[:, 7]);

In [5]:
# Split data into testing and training subsets
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.3, random_state=100);

## Model Testing

In [9]:
# Naive Bayesian Classifier
nb_car_model = CategoricalNB()
fit!(nb_car_model, X_train, y_train)
nb_car_accuracy = accuracy_score(predict(nb_car_model, X_test), y_test)
println("Naive Bayesian Classifier Accuracy: $(round(nb_car_accuracy * 100, digits=1))%")

Naive Bayesian Classifier Accuracy: 85.5%


In [10]:
# Decision Tree Classifier
tree_car_model = DecisionTreeClassifier()
fit!(tree_car_model, X_train, y_train)
tree_car_accuracy = accuracy_score(predict(tree_car_model, X_test), y_test)
println("Decision Tree Classifier Accuracy: $(round(tree_car_accuracy * 100, digits=1))%")

Decision Tree Classifier Accuracy: 97.7%


In [11]:
# Support Vector Machine
svm_car_model = SVC()
fit!(svm_car_model, X_train, y_train)
svm_car_accuracy = accuracy_score(predict(svm_car_model, X_test), y_test)
println("Support Vector Machine Accuracy: $(round(svm_car_accuracy * 100, digits=1))%")

Support Vector Machine Accuracy: 93.1%


In [12]:
# Neural Network
nn_car_model = MLPClassifier(max_iter=1000)
fit!(nn_car_model, X_train, y_train)
nn_car_accuracy = accuracy_score(predict(nn_car_model, X_test), y_test)
println("Neural Network Accuracy: $(round(nn_car_accuracy * 100, digits=1))%")

Neural Network Accuracy: 97.9%
