In [5]:
# Arturo Polanco Lozano Capstone Udacity

# import packages
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from imutils import paths
import numpy as np
import mahotas
import cv2

def describe(image):
	# extract the mean and standard deviation from each channel of the image in the HSV color space
	(means, stds) = cv2.meanStdDev(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))
	colorStats = np.concatenate([means, stds]).flatten()
	# extract Haralick texture features
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	haralick = mahotas.features.haralick(gray).mean(axis=0)
	# return a concatenated feature vector of color statistics and Haralick
	# texture features
	return np.hstack([colorStats, haralick])


# grab the set of image paths and initialize the list of labels and matrix of features
print("[Please Wait ....] Extracting Features")
imagePaths = sorted(paths.list_images('4scenes'))
labels = []
data = []

# loop over the images in the input directory
for imagePath in imagePaths:
	# extract the label and load the image from disk
	label = imagePath[imagePath.rfind("/") + 1:].split("_")[0]
	image = cv2.imread(imagePath)
	# extract features from the image, then update the list of lables and features
	features = describe(image)
	labels.append(label)
	data.append(features)

# construct the training and testing split by taking 75% of the data for training and 25% for testing
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(data),
	np.array(labels), test_size=0.25, random_state=42)

# initialize the model as a random forest
model = RandomForestClassifier(n_estimators=20, random_state=42)

# train random forest
print("[Please Wait ....] Training ")
model.fit(trainData, trainLabels)
# evaluate the classifier
print("[Please Wait ....] Testing  ")
predictions = model.predict(testData)
print ("Output using 75% data for Trainig and 25% data for Testing")
print(classification_report(testLabels, predictions))

# loop over a few random images
for i in np.random.randint(0, high=len(imagePaths), size=(10,)):
	# grab the image and classify it
	imagePath = imagePaths[i]
	filename = imagePath[imagePath.rfind("/") + 1:]
	image = cv2.imread(imagePath)
	features = describe(image)
	prediction = model.predict(features.reshape(1, -1))[0]
	# show the prediction
	print("[PREDICTION] {}: {}".format(filename, prediction))
	cv2.putText(image, prediction, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
	cv2.imshow("4Scene Image Classification ", image)
	cv2.waitKey(0)




[Please Wait ....] Extracting Features
[Please Wait ....] Training 
[Please Wait ....] Testing  
Output using 75% data for Trainig and 25% data for Testing
             precision    recall  f1-score   support

      coast       0.85      0.83      0.84        93
     forest       0.94      0.95      0.95        87
    highway       0.86      0.70      0.77        63
     street       0.76      0.91      0.83        67

avg / total       0.86      0.85      0.85       310

[PREDICTION] forest_for137.jpg: forest
[PREDICTION] street_hexp30.jpg: street
[PREDICTION] coast_n291057.jpg: coast
[PREDICTION] street_gre193.jpg: street
[PREDICTION] highway_bost180.jpg: highway
[PREDICTION] coast_land902.jpg: coast
[PREDICTION] forest_text29.jpg: forest
[PREDICTION] street_boston61.jpg: street
[PREDICTION] forest_nat361.jpg: forest
[PREDICTION] highway_gre402.jpg: highway
