In [9]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np

# Setting random seed
np.random.seed(0)

# dataset
inputs = np.array([[1,0,0,7,'Apple'],[0,1,0,20,'Watermelon'],[1,0,0,1,'Cherry'],[0,1,0,7.5,'Apple'],[1,0,0,1,'Strawberry'],[1,0,0,0.8,'Cherry']])

# Creating a dataframe with the four feature variables
df = pd.DataFrame(inputs, columns=['Red','Green','Blue', 'Size', 'Fruit'])

# Viewing the top 5 rows
df.head()

# encode the label values to corresponding numerical values using LabelEncoder package
from sklearn.preprocessing import LabelEncoder
lbl = LabelEncoder()
# Fitting it to our dataset
df.Fruit = lbl.fit_transform(df.Fruit)

# Viewing tthe transformed dataset
df

# Creating Training Dataset by random number if it is less than 0.7, and the rest of them will be in Test Dataset  
df['is_train'] = np.random.uniform(0, 1, len(df)) <= 0.7

# Creating dataframes with test rows and training rows
train, test = df[df['is_train'] == True] , df[df['is_train'] == False]

# Show the number of observations for the test and training dataframes
print('Number of observations in the training data:', len(train))
print('Number of observations in the test data:', len(test))

# Creating a list of the feature column's names
features = df.columns[:4]

# Viewing features
features

# Converting each species name into digits
target = pd.factorize(train['Fruit'])[0]

# Creating a random forest Classifier
clf = RandomForestClassifier(n_jobs=2, random_state=0)
# n_jobs: how many processors will be allowed to use
# random_state:  random seed for bootstrapping

# Training the classifier
clf.fit(train[features], target)

# Applying the trained Classifier to the rest
print(f"\nprediction: {clf.predict(test[features])}")

# Viewing the predicted probabilities of the first 5 observations
# Probabilities of 4 predicted fruits
clf.predict_proba(test[features])[0:5]

Number of observations in the training data: 5
Number of observations in the test data: 1

prediction: [0]


array([[0.9   , 0.0445, 0.0555]])