In [1]:
import pandas as pd

filename = './data/ionosphere.csv'

dataframe = pd.read_csv(filename)
array = dataframe.values

# Features - 35 in total
X = array[:, 0:34].astype(float)

# Target label in the last column.
Y = array[:, 34]


In [3]:
dataframe.describe()
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 351 entries, 0 to 350
Data columns (total 35 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   feature1   351 non-null    int64  
 1   feature2   351 non-null    int64  
 2   feature3   351 non-null    float64
 3   feature4   351 non-null    float64
 4   feature5   351 non-null    float64
 5   feature6   351 non-null    float64
 6   feature7   351 non-null    float64
 7   feature8   351 non-null    float64
 8   feature9   351 non-null    float64
 9   feature10  351 non-null    float64
 10  feature11  351 non-null    float64
 11  feature12  351 non-null    float64
 12  feature13  351 non-null    float64
 13  feature14  351 non-null    float64
 14  feature15  351 non-null    float64
 15  feature16  351 non-null    float64
 16  feature17  351 non-null    float64
 17  feature18  351 non-null    float64
 18  feature19  351 non-null    float64
 19  feature20  351 non-null    float64
 20  feature21 

In [4]:
from sklearn.ensemble import RandomForestClassifier

seed = 7

model = RandomForestClassifier(random_state=seed)
model.fit(X, Y)

print(model.feature_importances_)

[0.03647519 0.         0.07015783 0.0389987  0.14636418 0.04820834
 0.08545061 0.04225811 0.01767687 0.0216788  0.00921579 0.01670527
 0.00477595 0.03257709 0.01394739 0.03045689 0.00989951 0.02473747
 0.0086112  0.01530396 0.01084755 0.02107679 0.00785155 0.0270605
 0.0088732  0.00980895 0.07504935 0.02706318 0.03740858 0.00829844
 0.03206037 0.01346907 0.03271485 0.01491844]


In [6]:
# Feature Importance with Extra Trees Classifier
from sklearn.ensemble import ExtraTreesClassifier

seed = 7

# feature extraction
model = ExtraTreesClassifier(n_estimators=100, random_state=seed)
model.fit(X, Y)

print(model.feature_importances_)

[0.0694837  0.         0.05573698 0.04405684 0.07768689 0.04577373
 0.06614819 0.05335822 0.0228045  0.02856548 0.01616335 0.02441515
 0.02121956 0.03661774 0.03457169 0.02571398 0.02142976 0.01678916
 0.01830527 0.02136676 0.02558404 0.02131649 0.02807828 0.02470733
 0.016082   0.01499639 0.02332734 0.02438427 0.02132864 0.01215566
 0.02786994 0.0206377  0.02091184 0.01841314]


In [7]:
# Cross Validation Classification Confusion Matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

test_size = 0.33
seed = 7

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

model = LogisticRegression(solver='liblinear')
model.fit(X_train, Y_train)

predicted = model.predict(X_test)

matrix = confusion_matrix(Y_test, predicted)
print(matrix)

[[28  9]
 [ 2 77]]


In [8]:
# Cross Validation Classification Confusion Matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

test_size = 0.33
seed = 7

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

model = LogisticRegression(solver='liblinear')
model.fit(X_train, Y_train)

predicted = model.predict(X_test)

report = classification_report(Y_test, predicted)
print(report)

              precision    recall  f1-score   support

         0.0       0.93      0.76      0.84        37
         1.0       0.90      0.97      0.93        79

    accuracy                           0.91       116
   macro avg       0.91      0.87      0.88       116
weighted avg       0.91      0.91      0.90       116



In [10]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

test_size = 0.33
seed = 7

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

# Create and train the Support Vector Machine (SVM) model with specified parameters
model = SVC(gamma='auto', random_state=seed)
model.fit(X_train, Y_train)

# Make predictions on the test set
predicted = model.predict(X_test)

# Generate the classification report
report = classification_report(Y_test, predicted)
print(report)



              precision    recall  f1-score   support

         0.0       0.97      0.84      0.90        37
         1.0       0.93      0.99      0.96        79

    accuracy                           0.94       116
   macro avg       0.95      0.91      0.93       116
weighted avg       0.94      0.94      0.94       116

