In [None]:
from scipy.io import arff
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn import svm

#method for evaluating predictions
def evaluate_model(y_pred,y_test):
  print("Precision:", "%" + str(round(precision_score(y_test, y_pred)*100,2)))
  print("Recall:", "%" + str(round(recall_score(y_test, y_pred)*100,2)))
  print("F1:", "%" + str(round(f1_score(y_test, y_pred)*100,2)))
  print("Accuracy:", "%" + str(round(accuracy_score(y_test, y_pred)*100,2)))
  confusionmatrix = confusion_matrix(y_test,y_pred)
  print(confusionmatrix)


rawdata = arff.loadarff('/content/drive/MyDrive/Rice_Cammeo_Osmancik.arff')
df = pd.DataFrame(rawdata[0])


#normalizing data
scaler = MinMaxScaler()
classifications_reshaped = tf.reshape(df.values[:,7:8],(3810,1))
values_without_classifications =  df.values[:,:7]
scaled_values = scaler.fit_transform(values_without_classifications)
scaled_data = np.append(scaled_values,classifications_reshaped,axis=1)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns)

#transforming labels to int
le = preprocessing.LabelEncoder()
le.fit(scaled_df.Class)
scaled_df['Class'] = le.transform(scaled_df.Class)

X = scaled_df.values[:,:7]
y = scaled_df.values[:,7:8]

#splitting data using holdout method.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#constructing classifier for gini index.
gini_classifier = DecisionTreeClassifier(criterion="gini")
gini_model = gini_classifier.fit(X_train,y_train.astype("int"))
gini_predict = gini_model.predict(X_test)
print("Results for gini index using holdout method")
evaluate_model(tf.reshape(gini_predict.astype('int'),(1258,1)),y_test.astype('int'))

#constructing classifier for gain ratio using cross validation.
gain_classifier = DecisionTreeClassifier(criterion="log_loss")
cross_val = KFold(n_splits=10)
precision_scores = cross_val_score(gain_classifier, X, y.astype('int'), scoring='precision', cv=cross_val, n_jobs=-1)
recall_scores = cross_val_score(gain_classifier, X, y.astype('int'), scoring='recall', cv=cross_val, n_jobs=-1)
f1_scores = cross_val_score(gain_classifier, X, y.astype('int'), scoring='f1', cv=cross_val, n_jobs=-1)
accuracy_scores = cross_val_score(gain_classifier, X, y.astype('int'), scoring='accuracy', cv=cross_val, n_jobs=-1)
print("\nResults for gain ratio using 10K cross validation method.")
print("Precision:",precision_scores)
print("Recall:",recall_scores)
print("F1:",f1_scores)
print("Accuracy",accuracy_scores)

#constructing classifier for gain ratio using holdout method.
gain_classifier_holdout = DecisionTreeClassifier(criterion="log_loss")
gain_model = gain_classifier_holdout.fit(X_train,y_train.astype("int"))
gain_predict = gain_model.predict(X_test)
print("\nResults for gain ratio using holdout method")
evaluate_model(tf.reshape(gain_predict.astype('int'),(1258,1)),np.ravel(y_test.astype('int')))

#constructing classifier for naive bayes
bayes_classifier = GaussianNB()
bayes_model = bayes_classifier.fit(X_train,np.ravel(y_train.astype("int")))
bayes_predict = bayes_model.predict(X_test)
print("\nResults for bayes model using holdout method")
evaluate_model(tf.reshape(bayes_predict.astype("int"),(1258,1)),np.ravel(y_test.astype("int")))

#constructing classifier for neural network with 1 hidden layer
NN_classifier = MLPClassifier(hidden_layer_sizes=(10),max_iter=500,activation = 'relu',solver='adam')
NN_model = NN_classifier.fit(X_train,np.ravel(y_train.astype("int")))
NN_predict = NN_model.predict(X_test)
print("\nResult for neural network with 1 hidden layer using holdout method.")
evaluate_model(tf.reshape(NN_predict.astype("int"),(1258,1)),np.ravel(y_test.astype("int")))

#constructing classifier for neural network with 2 hidden layer
NN_classifier2 = MLPClassifier(hidden_layer_sizes=(10,10),max_iter=500,activation = 'relu',solver='adam')
NN_model2 = NN_classifier2.fit(X_train,np.ravel(y_train.astype("int")))
NN_predict2 = NN_model2.predict(X_test)
print("\nResult for neural network with 2 hidden layer using holdout method.")
evaluate_model(tf.reshape(NN_predict2.astype("int"),(1258,1)),np.ravel(y_test.astype("int")))

#constructing classifier for support vector machines
svm_classifier = svm.SVC(kernel='linear')
svm_model = svm_classifier.fit(X_train, np.ravel(y_train.astype("int")))
svm_predict = svm_model.predict(X_test)
print("\nResult for support vector machines using holdout method.")
evaluate_model(tf.reshape(svm_predict.astype("int"),(1258,1)),np.ravel(y_test.astype("int")))




Results for gini index using holdout method
Precision: %87.07
Recall: %91.74
F1: %89.34
Accuracy: %88.0
[[474  94]
 [ 57 633]]

Results for gain ratio using 10K cross validation method.
Precision: [0.         0.         0.         0.         0.92565056 1.
 1.         1.         1.         1.        ]
Recall: [0.         0.         0.         0.         0.90181818 0.88976378
 0.91076115 0.90026247 0.8976378  0.90026247]
F1: [0.         0.         0.         0.         0.91575092 0.93723849
 0.95185695 0.94459834 0.93426573 0.94313454]
Accuracy [0.8976378  0.85301837 0.85826772 0.77165354 0.87664042 0.8976378
 0.91076115 0.8976378  0.88188976 0.88188976]

Results for gain ratio using holdout method
Precision: %88.24
Recall: %90.29
F1: %89.26
Accuracy: %88.08
[[485  83]
 [ 67 623]]

Results for bayes model using holdout method
Precision: %92.25
Recall: %94.93
F1: %93.57
Accuracy: %92.85
[[513  55]
 [ 35 655]]

Result for neural network with 1 hidden layer using holdout method.
Precision: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')