In [14]:
# Dataset: 10.5281/zenodo.1048301
from marsvision.utilities import DataLoader

In [15]:
import os
# Dataset location
# Adjust to point to root of dataset
dataset_root = "X:\hirise-map-proj"

In [16]:
import pandas as pd

# Apply the SKLearn pipeline--
# Load data, extract features, train a model.
data_loader = DataLoader(os.path.join(dataset_root, "map-proj"))
data_loader.data_reader()
data_loader.data_transformer() 
# The extracted features we're training on should be in data_loader.df
data_df = data_loader.df

# Shaping dataframe to contain features and class codes
lines = pd.read_csv(os.path.join(dataset_root, "labels-map-proj.txt"), 
                    delimiter=" ",  
                    header = None,
                   names=["file_name", "class_code"])
lines = lines.sort_values(by="file_name")

df_labelled = lines.join(data_df.set_index("file_name"), 
                         on="file_name",
                         lsuffix="_left",
                         rsuffix="_right")
df_labelled = df_labelled.drop(labels=["class_code_right", "class"], axis=1)
df_labelled = df_labelled.rename(columns={
    "file_name" : "file_name", 
    "class_code_left" : "class_code"})



In [17]:
df_labelled

Unnamed: 0,file_name,class_code,0,1,2,3,4,5,img_data
873,ESP_011283_2265_RED-0032.jpg,0,84.706379,14424.955996,0.002600,403.657701,102.938210,1758.408455,"[[[100, 100, 100], [93, 93, 93], [82, 82, 82],..."
465,ESP_011289_1950_RED-0006.jpg,6,3.266122,822.193554,-0.001553,45.913871,78.866910,5354.609701,"[[[149, 149, 149], [150, 150, 150], [151, 151,..."
1815,ESP_011289_1950_RED-0007.jpg,6,5.141668,1284.688541,-0.001397,57.873429,60.246386,4771.687281,"[[[147, 147, 147], [147, 147, 147], [146, 146,..."
584,ESP_011289_1950_RED-0009.jpg,6,2.192261,554.220464,-0.000272,49.464534,75.727784,5847.890326,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
1397,ESP_011289_1950_RED-0013.jpg,6,2.810844,708.864470,0.001979,112.121442,77.699237,5529.686793,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
...,...,...,...,...,...,...,...,...,...
1377,TRA_000828_1805_RED-0148.jpg,6,15.058802,3613.226956,0.000252,52.019892,71.678802,5660.806630,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
513,TRA_000828_1805_RED-0149.jpg,6,11.644220,2833.688185,-0.002193,43.480424,85.822217,5167.247265,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ..."
2478,TRA_000828_1805_RED-0152.jpg,0,12.139087,2948.109689,-0.001669,16.581029,112.054319,1032.092588,"[[[126, 126, 126], [129, 129, 129], [134, 134,..."
1577,TRA_000828_1805_RED-0154.jpg,0,12.272701,2978.919521,-0.007510,35.245009,82.779309,4322.291647,"[[[142, 142, 142], [144, 144, 144], [146, 146,..."


## Cross Validation - Multi Class Classification With Random Forest

The classes in the Deep Mars dtaset are as follows:

0: Other

1: Crater

2: Dark Dune

3: Streak

4: Bright Dune

5: Impact

6: Edge

In [18]:
from sklearn.ensemble import RandomForestClassifier
from marsvision.pipeline import Model
random_forest = RandomForestClassifier(n_estimators = 500)
x = df_labelled["img_data"]
y = df_labelled["class_code"]
model = Model(random_forest, "sklearn", 
              training_images = x,
              training_labels = y)
model.cross_validate(5, "accuracy")
model.cv_results

{'fit_time': array([2.53399944, 2.53003836, 2.56496882, 2.6399951 , 2.49799728]),
 'score_time': array([0.09396577, 0.09099555, 0.08903241, 0.0909996 , 0.08500433]),
 'test_score': array([0.68848168, 0.64267016, 0.7591623 , 0.7539267 , 0.73560209])}

In [19]:
def classify_binary(model, label_number):
    # The model class, by default, uses these metrics in its cross validation:
    # ["accuracy", "precision", "recall", "roc_auc"]
    binary_labels = df_labelled["class_code"].apply(lambda label: 
                                                    1 if label == label_number else 0)
    model.training_labels = binary_labels
    model.cross_validate(5)
    return model.cv_results

## Cross Validation Results - Per-Class Binary Classification
### "Other" Class

In [6]:
classify_binary(model, 0)

NameError: name 'model' is not defined

## Cross Validation Results - Per-Class Binary Classification
### "Crater" Class

In [3]:
classify_binary(model, 1)

NameError: name 'classify_binary' is not defined

## Cross Validation Results - Per-Class Binary Classification
### "Dark Dune" Class

In [4]:
classify_binary(model, 2)

NameError: name 'classify_binary' is not defined

## Cross Validation Results - Per-Class Binary Classification
### "Streak" Class

In [43]:
classify_binary(model, 3)

{'fit_time': array([1.41099739, 1.36196542, 1.39599729, 1.35099721, 1.35799813]),
 'score_time': array([0.11403322, 0.10900116, 0.10900211, 0.10800195, 0.1110003 ]),
 'test_accuracy': array([0.9921466 , 0.9895288 , 0.9921466 , 0.9908377 , 0.98691099]),
 'test_precision': array([1.        , 0.33333333, 1.        , 0.66666667, 0.25      ]),
 'test_recall': array([0.14285714, 0.14285714, 0.14285714, 0.25      , 0.125     ]),
 'test_roc_auc': array([0.90215135, 0.929515  , 0.98905454, 0.91484788, 0.80944114])}

## Cross Validation Results - Per-Class Binary Classification
### "Bright Dune" Class

In [44]:
classify_binary(model, 4)

{'fit_time': array([1.72299933, 1.74999738, 1.78699899, 1.64499903, 1.6269989 ]),
 'score_time': array([0.13300109, 0.12900233, 0.13300109, 0.13100123, 0.12600112]),
 'test_accuracy': array([0.96204188, 0.95942408, 0.95680628, 0.94371728, 0.95418848]),
 'test_precision': array([1.        , 0.5       , 0.        , 0.07142857, 0.        ]),
 'test_recall': array([0.06451613, 0.03225806, 0.        , 0.03225806, 0.        ]),
 'test_roc_auc': array([0.86837125, 0.79901421, 0.84896361, 0.61090085, 0.65292008])}

## Cross Validation Results - Per-Class Binary Classification
### "Impact" Class

In [46]:
classify_binary(model, 5)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ValueError: got predict_proba of shape (764, 1), but need classifier with two classes for roc_auc_score scoring

## Cross Validation Results - Per-Class Binary Classification
### "Edge" Class

In [47]:
classify_binary(model, 6)

{'fit_time': array([1.62819624, 1.59908199, 1.67126584, 1.69209313, 1.68501782]),
 'score_time': array([0.12300372, 0.12700748, 0.12641144, 0.12500024, 0.13102841]),
 'test_accuracy': array([0.96204188, 0.93717277, 0.95157068, 0.93062827, 0.95811518]),
 'test_precision': array([0.88700565, 0.82065217, 0.85635359, 0.89583333, 0.89473684]),
 'test_recall': array([0.94578313, 0.90963855, 0.93373494, 0.77245509, 0.91616766]),
 'test_roc_auc': array([0.98990108, 0.97503727, 0.98438067, 0.98041104, 0.98768293])}