# Diabetes Classification using Rotation Forests  
Authors: Daniel Fischer, Alhagie Boye, Vamsi Sudersanam

## Introduction

In [1]:
import numpy as np  
from sklearn.model_selection import train_test_split
from sktime.classification.sklearn import RotationForest 
from sklearn.metrics import accuracy_score


In [2]:
# Load the data using np.loadtxt to reduce the amount of float values
data = np.loadtxt('../Data/pima_indian_diabetes.csv', delimiter=',', skiprows=1)

# Split the data into training, validation and testing sets 
# https://datascience.stackexchange.com/questions/15135/train-test-validation-set-splitting-in-sklearn 
train, validate, test = np.split(data, [int(0.6 * len(data)), int(0.8 * len(data))])

# Separating the features and the target for each split
X_train, y_train = train[:, :-1], train[:, -1]
X_validate, y_validate = validate[:, :-1], validate[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]

## Creating the model

In [3]:

# Rotation Forest Classifier
rotation_forest = RotationForest(random_state=42)
rotation_forest.fit(X_train, y_train)

# Validations 
y_validate_pred = rotation_forest.predict(X_validate)
val_accuracy = accuracy_score(y_validate, y_validate_pred)

# Model Testing
y_test_pred = rotation_forest.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)

# RESULTS!!!
print(f"Validation Accuracy: {val_accuracy:.2f}")
print(f"Test Accuracy: {test_accuracy:.2f}")

Validation Accuracy: 0.85
Test Accuracy: 0.76
