In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


In [4]:
data = pd.read_csv('portuguese_armada_dataset.csv')
data = data.dropna()
data.head()

Unnamed: 0,battle,year,portuguese_ships,dutch_ships,english_ships,ratio_of_portuguese_to_other_ships,spanish_involvement_1yes_0no,portuguese_outcome_-1defeat_0draw_1victory
0,Bantam,1601,6,3,0,2.0,0,0
1,Malacca Strait,1606,14,11,0,1.273,0,0
2,Ilha das Naus,1606,6,9,0,0.667,0,-1
3,Pulo Butum,1606,7,9,0,0.778,0,1
4,Surrat,1615,6,0,4,1.5,0,0


In [5]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, 2:-1], data.iloc[:, -1], test_size=0.3, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Separate target variable from features
y_train = np.array(y_train)
y_test = np.array(y_test)

In [6]:
# Train SVM Classifier
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)

# Predict on test set and evaluate performance
y_pred_svm = svm.predict(X_test)
acc_svm = accuracy_score(y_test, y_pred_svm)
cm_svm = confusion_matrix(y_test, y_pred_svm)

print("SVM Classifier Accuracy: ", acc_svm)
print("SVM Classifier Confusion Matrix: \n", cm_svm)

SVM Classifier Accuracy:  0.3333333333333333
SVM Classifier Confusion Matrix: 
 [[0 2 0]
 [2 3 0]
 [0 2 0]]


In [7]:
# Train Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on test set and evaluate performance
y_pred_rf = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)
cm_rf = confusion_matrix(y_test, y_pred_rf)

print("Random Forest Classifier Accuracy: ", acc_rf)
print("Random Forest Classifier Confusion Matrix: \n", cm_rf)

Random Forest Classifier Accuracy:  0.4444444444444444
Random Forest Classifier Confusion Matrix: 
 [[1 1 0]
 [1 2 2]
 [0 1 1]]


In [8]:
# Train KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict on test set and evaluate performance
y_pred_knn = knn.predict(X_test)
acc_knn = accuracy_score(y_test, y_pred_knn)
cm_knn = confusion_matrix(y_test, y_pred_knn)

print("KNN Classifier Accuracy: ", acc_knn)
print("KNN Classifier Confusion Matrix: \n", cm_knn)

KNN Classifier Accuracy:  0.5555555555555556
KNN Classifier Confusion Matrix: 
 [[1 1 0]
 [1 4 0]
 [1 1 0]]


In [9]:
print("SVM Classifier Accuracy: ", acc_svm)
print("Random Forest Classifier Accuracy: ", acc_rf)
print("KNN Classifier Accuracy: ", acc_knn)

print("\nSVM Classifier Confusion Matrix: \n", cm_svm)
print("Random Forest Classifier Confusion Matrix: \n", cm_rf)
print("KNN Classifier Confusion Matrix: \n", cm_knn)

SVM Classifier Accuracy:  0.3333333333333333
Random Forest Classifier Accuracy:  0.4444444444444444
KNN Classifier Accuracy:  0.5555555555555556

SVM Classifier Confusion Matrix: 
 [[0 2 0]
 [2 3 0]
 [0 2 0]]
Random Forest Classifier Confusion Matrix: 
 [[1 1 0]
 [1 2 2]
 [0 1 1]]
KNN Classifier Confusion Matrix: 
 [[1 1 0]
 [1 4 0]
 [1 1 0]]
