# Random Forest

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.model_selection import cross_validate

## Loading Dataset

In [2]:
dataset = pd.read_csv("../../../Datasets/car_evaluation.txt", sep=",")
dataset.columns = ['buying', 'maintenance', 'doors', 'persons', 'lug_boot', 'safety', 'target']
dataset.head()

Unnamed: 0,buying,maintenance,doors,persons,lug_boot,safety,target
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


In [3]:
samples_count, features_count = dataset.shape
samples_count, features_count

(1727, 7)

## Separating target column from dataset

In [4]:
target = dataset['target']
dataset = dataset.drop('target',axis=1)

## One-Hot Encoding of Dataset & Target column

### Dataset

In [5]:
ohe_dataset = preprocessing.OneHotEncoder()
ohe_dataset.fit(dataset.values) 
dataset_encoded = ohe_dataset.transform(dataset.values).toarray()

### Target Column

In [6]:
ohe_target = preprocessing.OneHotEncoder()
target_2D = target.values.reshape(-1,1)
ohe_target.fit(target_2D)
target_encoded = ohe_target.transform(target_2D).toarray()

## Evaluate Random Forest using Cross Validation

In [8]:
x = dataset_encoded
y = target_encoded
random_forest = RandomForestClassifier()
scores = cross_validate(random_forest, x, y, cv=5, scoring='accuracy',return_train_score=True)

## Find accuracy of Decision Tree

In [9]:
train_score = scores['train_score'].mean()
test_score = scores['test_score'].mean()
print("Training::%.2f" % train_score)
print("Testing::%.2f" % test_score)

Training::1.00
Testing::0.77


## Train Random Forest

In [10]:
random_forest.fit(x,y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

## Prediction by using input data from user

In [12]:
buying = input("Enter buying category(vhigh,high,med,low)::")
maintenance = input("Enter buying maintenance(vhigh,high,med,low)::")
doors = input("Enter no of doors(2,3,4,5more)::")
persons = input("Enter no of persons(2,4,more)::")
lug_boot = input("Enter lug_boot category(small,med,big)::")
safety = input("Enter safety category(low,med,high)::")

Enter buying category(vhigh,high,med,low)::vhigh
Enter buying maintenance(vhigh,high,med,low)::med
Enter no of doors(2,3,4,5more)::4
Enter no of persons(2,4,more)::4
Enter lug_boot category(small,med,big)::med
Enter safety category(low,med,high)::low


In [13]:
user_sample = [[buying, maintenance, doors, persons, lug_boot, safety]]
user_sample_encoded = ohe_dataset.transform(user_sample).toarray()
result_encoded = random_forest.predict(user_sample_encoded)
result_decoded = ohe_target.inverse_transform(result_encoded)
result = result_decoded[0]

In [14]:
if result == 'unacc':
    print("Your car is unaccurate")
elif result == 'acc':
    print("Your car is accurate")
elif result == 'good':
    print("Your car is good")
else:
    print("Your car is very good")

Your car is unaccurate
