In [44]:
#import the necessary libraries
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from scipy.io import arff

In [45]:
# Load data from .arff file
data, meta = arff.loadarff('C:\Program Files\Weka-3-8-6\data/iris.arff')
attributes = meta.names()[:-1] # Extract feature names from meta-data
n_features = len(attributes)

In [55]:
# Convert data to pandas dataframe format
df = pd.DataFrame(data)

# Display data as a table
df

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,b'Iris-setosa'
1,4.9,3.0,1.4,0.2,b'Iris-setosa'
2,4.7,3.2,1.3,0.2,b'Iris-setosa'
3,4.6,3.1,1.5,0.2,b'Iris-setosa'
4,5.0,3.6,1.4,0.2,b'Iris-setosa'
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,b'Iris-virginica'
146,6.3,2.5,5.0,1.9,b'Iris-virginica'
147,6.5,3.0,5.2,2.0,b'Iris-virginica'
148,6.2,3.4,5.4,2.3,b'Iris-virginica'


In [47]:
# Convert data to numpy array format
X = np.zeros((len(data), n_features))
y = []
for i, instance in enumerate(data):
    instance_data = []
    for attr in attributes:
        instance_data.append(instance[attr])
    X[i,:] = instance_data
    y.append(instance[-1])
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [48]:
# Encode labels as integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [49]:
# Split data into training and testing sets
n_train = int(0.8 * len(data))  # Calculate the number of training instances
X_train = X[:n_train,:]  # Take the first n_train instances for training
y_train = y[:n_train]  # Take the first n_train labels for training
X_test = X[n_train:,:]  # Take the remaining instances for testing
y_test = y[n_train:]  # Take the remaining labels for testing


In [50]:
# Create MLP classifier object and fit to training data
mlp = MLPClassifier(hidden_layer_sizes=(50,50), max_iter=1000)
mlp.fit(X_train, y_train)

In [51]:
# Evaluate classifier performance on testing data
accuracy = mlp.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.8


In [52]:
from sklearn.metrics import classification_report

# Get all possible classes
classes = list(set(y_train) | set(y_test))

# Generate classification report on testing data
y_pred = mlp.predict(X_test)
report = classification_report(y_test, y_pred, labels=classes)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       1.00      0.80      0.89        30

   micro avg       0.80      0.80      0.80        30
   macro avg       0.33      0.27      0.30        30
weighted avg       1.00      0.80      0.89        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [53]:
# Print classification report
print("Classification report:\n")
print(report)

Classification report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           2       1.00      0.80      0.89        30

   micro avg       0.80      0.80      0.80        30
   macro avg       0.33      0.27      0.30        30
weighted avg       1.00      0.80      0.89        30



In [57]:
from sklearn.metrics import confusion_matrix

# Generate confusion matrix
y_pred = mlp.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

# Calculate the total number of instances and the number of correctly and incorrectly classified instances
total_instances = np.sum(cm)
correct_instances = np.sum(np.diag(cm))
incorrect_instances = total_instances - correct_instances

# Calculate the percentage of correctly and incorrectly classified instances
correct_percentage = correct_instances / total_instances * 100
incorrect_percentage = incorrect_instances / total_instances * 100

# Print the results
print("Correctly Classified Instances: {} ({:.2f}%)".format(correct_instances, correct_percentage))
print("Incorrectly Classified Instances: {} ({:.2f}%)".format(incorrect_instances, incorrect_percentage))


Correctly Classified Instances: 24 (80.00%)
Incorrectly Classified Instances: 6 (20.00%)
