[![Binder](https://mybinder.org/badge_logo.svg)](https://lab.mlpack.org/v2/gh/mlpack/examples/master?urlpath=lab%2Ftree%2Fforest_covertype_prediction_with_random_forests%2Fcovertype-rf-py.ipynb)

In [11]:
# @file covertype-rf-py.ipynb
#
# Classification using Random Forest on the Covertype dataset.

In [12]:
import mlpack
import pandas as pd
import numpy as np

In [13]:
# Load the dataset from an online URL.
df = pd.read_csv('https://lab.mlpack.org/data/covertype-small.csv.gz')

In [14]:
# Split the labels.
labels = df['label']
dataset = df.drop('label', 1)

In [15]:
# Split the dataset using mlpack. The output comes back as a dictionary, which
# we'll unpack for clarity of code.
output = mlpack.preprocess_split(input=dataset, input_labels=labels, test_ratio=0.3)

In [16]:
training_set = output['training']
training_labels = output['training_labels']
test_set = output['test']
test_labels = output['test_labels']

In [17]:
# Train a random forest.
output = mlpack.random_forest(training=training_set, labels=training_labels,
    print_training_accuracy=True, num_trees=10, minimum_leaf_size=3)

In [18]:
random_forest = output['output_model']

In [19]:
# Predict the labels of the test points.
output = mlpack.random_forest(input_model=random_forest, test=test_set)

In [20]:
# Now print the accuracy. The 'probabilities' output could also be used to
# generate an ROC curve.
correct = np.sum(output['predictions'] == test_labels.flatten())
print(str(correct) + ' correct out of ' + str(len(test_labels)) +
      ' (' + str(100 * float(correct) / float(len(test_labels))) + '%).')

24513 correct out of 30000 (81.71%).
