good reading: https://blog.hyperiondev.com/index.php/2017/12/11/machine-learning/

In [2]:
from sklearn import datasets
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from os import walk
import cv2
import numpy as np
from sklearn.externals import joblib
import os
import csv

In [47]:
iris = datasets.load_iris()

In [48]:
print(iris.target_names)

['setosa' 'versicolor' 'virginica']


In [49]:
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [50]:
data = pd.DataFrame({
        'sepal length':iris.data[:,0],
        'sepal width':iris.data[:,1],
        'petal length':iris.data[:,2],
        'petal width':iris.data[:,3],
        'species':iris.target
    })
data.head()

Unnamed: 0,petal length,petal width,sepal length,sepal width,species
0,1.4,0.2,5.1,3.5,0
1,1.4,0.2,4.9,3.0,0
2,1.3,0.2,4.7,3.2,0
3,1.5,0.2,4.6,3.1,0
4,1.4,0.2,5.0,3.6,0


In [51]:
x=data[['sepal length','sepal width','petal length','petal width']]
y=data['species']

In [52]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

In [57]:
np.shape(x_train)
x_train.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width
49,5.0,3.3,1.4,0.2
25,5.0,3.0,1.6,0.2
119,6.0,2.2,5.0,1.5
4,5.0,3.6,1.4,0.2
55,5.7,2.8,4.5,1.3


In [8]:
clf = RandomForestClassifier(n_estimators=100, max_depth = 3)
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)

In [9]:
print('Accuracy:',metrics.accuracy_score(y_test,y_pred))

('Accuracy:', 0.9777777777777777)


# Trying to load image data

In [3]:
def getFish(filepath):
    img = cv2.imread(filepath)
    # standardize size for prediction step
    img = cv2.resize(img, (150, 150))
    img = img/255.0
    img = img.reshape(img.shape[0]*img.shape[1]*img.shape[2])
    
    if "noeyespot" in filepath:
        classification = "noeyespot"
    else:
        classification = "eyespot"  
    
    _, _, species = filepath.partition('eyespot/')
    
    #res = model.predict(img)
    return(img, classification, species)

def create_x_y(image_filepaths):
    
    #create lists for image training data and label training data
    x_train = []
    y_train = []
    
    #iterate through list of image filepaths
    #get pixel, label data and store in x,y
    #append x and y to x_train and y_train lists
    #convert classification to binary
    for i in range(len(image_filepaths)):
        x, y, _ = getFish(image_filepaths[i])
        x_train.append(x)
        y_train.append(y)
        if (y_train[i] == 'eyespot'):
            y_train[i] = 0
        else:
            y_train[i] = 1
        
    #convert labels back to categorical from binary
    #y_train = to_categorical(y_train)
    
    return x_train, y_train

def predict(model, get_fish_output, eyespot_threshold):
    
    img, correct_class, fam = get_fish_output
    img = img.reshape((1,) + img.shape)
    
    predicted = clf.predict_proba(img)
    
    y_pred =[]
    prediction =''

    if predicted [:,0][0] > eyespot_threshold:
        y_pred = 0
    else: 
        y_pred = 1
    
    if (y_pred == 0):
        prediction = "eyespot"
    else:
        prediction = "noeyespot"

    #print "Model classification:",prediction,"\nCorrect classification:", correct_class,"\nPrediction wieghts:",prediction_weights,"\nType:",fam,"\n"
    return(prediction, correct_class, fam)

def get_img_paths(family_filepath):
    
    fish = []

    for (dirpath, dirnames, filenames) in walk(family_filepath):
        for i in range(len(filenames)):
            img = dirpath + filenames[i]
            if "Store" not in img and ".csv" not in img:  
                fish.append(img)
        break
    
    return fish, filenames

In [4]:
##CHANGE THESE ACCORDING TO YOUR PATH!!
eye_path = "/Users/leannwoo/Dropbox/machine_learning_fishes/image_classifiers/Eyespots_ocelli/eyespot/"
noeye_path = "/Users/leannwoo/Dropbox/machine_learning_fishes/image_classifiers/Eyespots_ocelli/noeyespot/"

fish_paths = []

for (dirpath, dirnames, filenames) in walk(eye_path):
    for i in range(len(filenames)):
        eye_img = dirpath + filenames[i]
        if "Store" not in eye_img:  
            fish_paths.append(eye_img)
    break

for (dirpath, dirnames, filenames) in walk(noeye_path):
    for i in range(len(filenames)):
        noeye_img = dirpath + filenames[i]
        if "Store" not in noeye_img:
            fish_paths.append(noeye_img)
    break
    
x, y = create_x_y(fish_paths)

In [5]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

# Train classifier and predict on testing data

In [6]:
clf = RandomForestClassifier(n_estimators=100, max_depth=10)
clf.fit(x_train,y_train)
#y_pred=clf.predict(x_test)

y_pred =[]

threshold = 0.4
predicted = clf.predict_proba(x_test)

for i in range(len(predicted)):
    if predicted [:,0][i] > threshold:
        y_pred.append(0)
    else: 
        y_pred.append(1)

In [7]:
print('Accuracy:',metrics.accuracy_score(y_test,y_pred))

('Accuracy:', 0.7435897435897436)


# Save and load model

In [9]:
joblib.dump(clf, 'rf1.pkl') 
os.getcwd()

'/Users/leannwoo/machine_learning_fishes'

In [116]:
clf = joblib.load('rf1.pkl') 

# Using the prediction function

In [117]:
predict(clf, getFish(fish_paths[92]))

('noeyespot', 'noeyespot', 'Neoniphon_Neoniphon argenteus_-1921272186.jpg')

# Predicting over the reef families

### Labridae

### Remaining: Acanthuridae, Pomacanthidae, Pomacentridae, Apogonidae

In [130]:
family = 'Labridae2'

family_path = "/Users/leannwoo/Dropbox/machine_learning_fishes/randall/Labridae2/"

fish_list_img, fish_img_names = get_img_paths(family_path)

In [131]:
os.chdir(family_path)

In [1]:
with open('classification_data.csv', 'wb') as csvfile:
    filewriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    filewriter.writerow(['Model Classification','Prediction Weight', 'Species'])
    
    try:
        os.mkdir('eyespot')
        os.mkdir('noeyespot')
    except OSError:
        1
      #  print "unsuccessful"

    for i in range(len(fish_list_img)):
        classification,_,family = predict(clf, getFish(fish_list_img[i]), 0.4)
        #species = family.split('_')[1]
        filewriter.writerow([classification, family])
        
        #weights = np.array2string(weights)
        #weights = (weights.split('[['))[1].split(']]')[0]
        
        
        if (classification == 'eyespot'):
            try:
                os.rename(fish_img_names[i], 'eyespot/' + fish_img_names[i])
            except OSError:
                1
        else:
            try:
                os.rename(fish_img_names[i], 'noeyespot/' + fish_img_names[i])
            except OSError:
                1

NameError: name 'csv' is not defined