# Earth or Non-Earth Simple Classification using SVM

#### Imports

In [73]:
######## DATA PROCESSING IMPORTS ######## 

# Dataset Pre-processing
from skimage import color
from sklearn import preprocessing # for normalization

######## TRAINING IMPORTS ######## 

# Label (y) Processing
from sklearn.preprocessing import LabelEncoder # Convert labels (y) from string to integer

# Model 
from sklearn.model_selection import train_test_split # Split train and test data
from sklearn.svm import SVC # SVM model
from sklearn.metrics import classification_report # Scores the trained model using test set

# Cross Validate
from sklearn.model_selection import cross_val_score

######## OTHER IMPORTS ######## 

# Math Functions and Plotting
from numpy import *
from matplotlib import pyplot as plt

# General Library Imports
import numpy as np # Math and array functions
import cv2 # OpenCV
import os # OS interactions
import joblib # Lightweight save/load of large data (like models)


#### 1) Fetch Dataset

In [74]:
# True Images
positive_path = r"Positive images here" 
# False Images
negative_path = r"Negative images here"

# os.listdir returns a list containing the names of the entries in the directory given by path.
postivie_list = os.listdir(positive_path)
negative_list = os.listdir(negative_path)

# Total number of images in given path
pos_total_data = size(postivie_list)
neg_total_data = size(negative_list)
print("Number of Positive Images: ", pos_total_data)
print("Number of Negative Images: ", neg_total_data)

Number of Positive Images:  198
Number of Negative Images:  120


#### 2) Pre-process dataset

In [75]:
# Define pre-processing function
def Preprocess_dataset(img):
    img = cv2.resize(img,(320,140)) # Resize so all dataset will have a uniform size
    gray = color.rgb2gray(img)  # Convert to grayscale
    return gray

# Vector for data and target
features = [] #X (features of a certain image) aka data
labels = [] #Y (label) aka target

In [76]:
# Pre-process each file in dataset
for file in postivie_list: 
    img = cv2.imread(os.path.join(positive_path,file)) # Open file using OpenCV and OS
    gray = Preprocess_dataset(img)
    features.append(gray)
    labels.append(1) # Positive images are labeled as "1"
    
for file in negative_list: 
    img = cv2.imread(os.path.join(negative_path,file)) # Open file using OpenCV and OS
    gray = Preprocess_dataset(img)
    features.append(gray)
    labels.append(0) # Negative images are labeled as "0"    
    
print('Pre-process done!')

Pre-process done!


#### 3) Train the SVM model

In [84]:
# Process the labels 
le = LabelEncoder() # Convert labels from string to integers
labels = le.fit_transform(labels) #Scale training labels to standardized mean and variance

# Split training and testing data (80%-Train ; 20%-Test)
print("Splitting training and testing dataset...")
(trainFeatures, testFeatures, trainLabels, testLabels) = train_test_split(np.array(features), labels, test_size=0.20, random_state=2)

# Adjust dimensions of train and test features
nsamples, nx, ny = trainFeatures.shape
trainFeatures = trainFeatures.reshape((nsamples,nx*ny))
msamples, mx, my = testFeatures.shape
testFeatures = testFeatures.reshape((msamples,mx*my))

# Train the model
print("Training SVM Model...")
model = SVC(kernel= 'rbf', C=10) # Use SVM model with a Radial Basis Function Kernel and Regularizer C (try 4)
model.fit(trainFeatures, trainLabels) # Fit training data using SVM model
print("Training done!")

# Evaluate the model
print("Evaluating the model using test data ...")
predictedLabel = model.predict(testFeatures) # Evaluate model using test data (aka prediction)
print(classification_report(testLabels, predictedLabel)) # Print test evaluation report (true label, predicted label)

Splitting training and testing dataset...
Training SVM Model...
Training done!
Evaluating the model using test data ...
              precision    recall  f1-score   support

           0       0.86      1.00      0.92        18
           1       1.00      0.93      0.97        46

    accuracy                           0.95        64
   macro avg       0.93      0.97      0.94        64
weighted avg       0.96      0.95      0.95        64



#### 4) Cross Validate (Optional)

In [85]:
cv_results = cross_val_score(model, trainFeatures, trainLabels, cv=5) # K-fold = 5
print(cv_results.mean()) # Mean score

0.9646274509803922


#### 5) Save the Model

In [86]:
# Save the model:
joblib.dump(model, 'Earth or Not Earth Simple Classifier.npy')

['Earth or Not Earth Simple Classifier.npy']

#### 6) Load the Model

In [87]:
model = joblib.load(r'Model path here')

#### 7) Test the model

In [89]:
img_orig = cv2.imread("image to test here")
print("Processing Image...")
gray = Preprocess_dataset(img_orig)
#hog1 = feature_hog(gray)
#hog2 = preprocessing.normalize([hog1])
#hist = feature_lbp(gray)
#hist2 = preprocessing.normalize([hist])
#all_features = np.hstack([hog2, hist2]) 
window_feat = gray.reshape(1, -1)
prediction = model.predict(window_feat)
if prediction == 1:
    print('(1) Earth')
elif prediction == 0:
    print('(0) Non-Earth')
else:
    print(prediction)

Processing Image...
(1) Earth
