# Building a Random Forest Classifier

# Load Data and Initialize Environment

In [None]:
# Load the library with the iris dataset
from sklearn.datasets import load_iris
 
# Load scikit's random forest classifier library
from sklearn.ensemble import RandomForestClassifier
 
# Load pandas
import pandas as pd
 
# Load numpy
import numpy as np
 
# Set random seed
np.random.seed(0)

# Create an object called iris with the iris data
iris = load_iris()
 
# Create a dataframe with the four feature variables
df = pd.DataFrame(iris.data, columns = iris.feature_names)

# Add a new column with the species names; this is what we are going to try to predict
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
 
# View the top 5 rows
df.head()

# Create Training and Test Data

In [None]:
# Create a new column that, for each row, generates a random number between 0 and 1, and
# if that value is less than or equal to .75, then sets the value of that cell as True
# and false otherwise. This is a quick and dirty way of randomly assigning some rows to
# be used as the training data and some as the test data.
df['is_train'] = np.random.uniform(0, 1, len(df)) = .75
 
# View the top 5 rows
df.head()
# Create two new dataframes, one with the training rows and one with the test rows
train, test = df[df['is_train'] == True], df[df['is_train'] == False]
# Show the number of observations for the test and training dataframes
print('Number of observations in the training data:', len(train))
print('Number of observations in the test data:', len(test))

# Pre-Process Data

In [None]:
# Create a list of the feature column's names
features = df.columns[:4]
 
# View features
features
# train['species'] contains the actual species names. Before we can use it,
# we need to convert each species name into a digit. So, in this case, there
# are three species, which have been coded as 0, 1, or 2.
y = pd.factorize(train['species'])[0]

In [None]:
# View target
y

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=51eeb538-78d0-4759-ab21-b1b8c45886c3' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>