# Random Forest Regression

## Importing the libraries

In [86]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [87]:
dataset = pd.read_csv('test_data.csv')
x = dataset.iloc[:, 4:-1].values
y = dataset.iloc[:, -1].values

In [88]:
print(x)

[[ 492 1780]
 [1038   92]
 [1703  829]
 ...
 [ 289 1352]
 [ 282 1143]
 [1496  924]]


In [89]:
print(y)

[2 3 1 ... 2 2 1]


## Splitting the Dataset into Training and Test Set

In [90]:

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)

In [91]:
print(x_train)

[[1555  526]
 [ 301  632]
 [1501 1107]
 ...
 [1890 1269]
 [ 244 2462]
 [ 292 1621]]


In [92]:
print(y_train)

[1 2 3 ... 4 5 4]


In [93]:
print(x_test)

[[1387 2546]
 [1322 2377]
 [ 642 1028]
 ...
 [ 104 2263]
 [ 622  819]
 [ 449 1152]]


In [94]:
print(y_test)

[1 4 2 ... 3 2 2]


## Feature Scaling

In [95]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [96]:
print(x_train)

[[ 0.96853869 -1.14199639]
 [-1.20650017 -1.01896707]
 [ 0.87487673 -0.46765647]
 ...
 [ 1.54958974 -0.27963053]
 [-1.30536557  1.10502958]
 [-1.2221105   0.12891964]]


In [97]:
print(x_test)

[[ 0.67714593  1.20252451]
 [ 0.56440468  1.006374  ]
 [-0.61504223 -0.55934812]
 ...
 [-1.54819288  0.87405945]
 [-0.64973185 -0.80192479]
 [-0.94979702 -0.41542704]]


## Training the Random Forest Regression model on the whole dataset

In [98]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 1000, random_state = 0)
regressor.fit(x_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=1000, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [99]:
y_pred = regressor.predict(x_test)

In [100]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.03912019550737145

## Predicting a new result

In [110]:
arr = [1199, 1799]
res = regressor.predict(sc.transform([arr]))
print(res)

[3.32]


## Visualising the Random Forest Regression results (higher resolution)

In [None]:
plt.figure(figsize = (24, 5))
plt.plot(y_test, color = 'red', label = 'Actual Reliance Delivered Quantity')
plt.plot(y_pred, color = 'blue', label = 'Predicted Reliance Delivered Quantity')
plt.title('Reliance % Delivered Quantity')
plt.xlabel('Time')
plt.ylabel('% Delivered Quantity')
plt.legend()
plt.show()