# Random Forest Classifier & Decision Tree Classifier
###### Created by Esmira Abdullaieva & Shovak Myroslav

In [17]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn.metrics as metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

### Dataset analysis

In [18]:
df = pd.read_csv('Mobile_Price.csv')

In [19]:
print('The number of duplicates:', df.duplicated().sum())
print(f'Empty cells:\n{df.isnull().sum()}')

The number of duplicates: 0
Empty cells:
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64


#### Correlation analysis

In [20]:
correlation = df.corr()
correlation.style.background_gradient(cmap='coolwarm')

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
battery_power,1.0,0.051775,0.051174,-0.029548,0.038235,0.053779,0.003501,0.026631,0.005979,-0.027767,0.049995,0.029265,0.000949,0.007225,-0.036318,-0.023418,0.066936,0.0235,-0.02486,0.011907,0.201921
blue,0.051775,1.0,0.03924,0.004405,0.038404,-0.005185,0.028535,0.003465,-0.046366,0.007099,0.023452,0.023612,-0.045505,0.017324,-0.011567,0.026401,0.009501,-0.075486,0.004405,-0.033429,0.015533
clock_speed,0.051174,0.03924,1.0,0.031709,0.017362,-0.011402,0.0202,-0.049759,0.038384,-0.015988,0.021638,-0.018135,-0.030104,-0.006021,-0.004315,-0.005392,-0.002214,-0.017378,0.053566,-0.039293,-0.017881
dual_sim,-0.029548,0.004405,0.031709,1.0,-0.022646,-0.035593,-0.010088,-0.00072,0.000292,-0.012979,-0.005654,-0.019653,-0.008789,0.058778,-0.025298,-0.034312,-0.049047,-0.036396,-0.003955,0.023586,0.03822
fc,0.038235,0.038404,0.017362,-0.022646,1.0,-0.013704,-0.019043,-0.005858,-0.020618,-0.051665,0.646487,-0.013027,-0.021701,0.007216,-0.042414,-0.019086,-0.042274,-0.001741,0.003121,0.013012,0.014837
four_g,0.053779,-0.005185,-0.011402,-0.035593,-0.013704,1.0,0.036246,-0.004602,-0.01477,-0.019134,0.011326,-0.021091,0.024733,-0.008123,0.034806,0.062528,-0.056329,0.58711,0.039046,-0.000436,0.006276
int_memory,0.003501,0.028535,0.0202,-0.010088,-0.019043,0.036246,1.0,0.043089,-0.025742,0.005748,-0.004075,0.054752,0.031208,0.028324,0.060799,0.014405,0.008174,-0.02186,0.006634,-0.020371,0.048327
m_dep,0.026631,0.003465,-0.049759,-0.00072,-0.005858,-0.004602,0.043089,1.0,0.005342,-0.010806,0.02217,-0.006204,0.010861,0.003264,-0.036944,-0.040174,-0.002763,-0.022827,-0.001371,-0.015863,0.012215
mobile_wt,0.005979,-0.046366,0.038384,0.000292,-0.020618,-0.01477,-0.025742,0.005342,1.0,-0.007139,0.015956,-0.013476,-0.023377,-0.022158,-0.05256,-0.018089,-0.008236,0.034939,-0.035626,0.000981,-0.058581
n_cores,-0.027767,0.007099,-0.015988,-0.012979,-0.051665,-0.019134,0.005748,-0.010806,-0.007139,1.0,-0.024688,0.003665,0.008934,0.014689,-0.001046,0.020994,-0.013728,-0.00849,0.006618,-0.037739,0.009386


In [21]:
# removing weakly correlated data
df = df.drop(['clock_speed', 'mobile_wt', 'touch_screen'], axis=1)

In [22]:
y = df['price_range']
X = df.drop(['price_range'], axis=1)

In [23]:
# separation of data into test and training
SEED = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=SEED)

### Random Forest Classifier

In [24]:
# creating Random Forest Classifier model and it's initialization
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
accuracy = rfc.score(X_test, y_test)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8171641791044776


In [36]:
price_predict = rfc.predict(X_test)
print(f'Predict price:\n{price_predict}')

Predict price:
[1 3 3 3 0 3 2 3 2 0 3 3 3 1 2 3 3 2 1 3 0 1 2 1 1 1 0 0 2 2 0 1 0 0 3 2 0
 3 0 2 0 3 2 1 2 3 0 0 1 2 3 2 1 2 1 1 2 3 0 2 0 0 3 0 1 0 3 2 3 2 3 1 0 3
 0 3 2 0 1 3 0 3 0 3 0 3 1 3 0 2 3 1 0 0 2 2 3 2 2 3 3 0 0 1 0 0 3 1 3 2 1
 1 3 0 0 2 3 0 1 1 0 2 0 2 1 0 3 1 1 2 3 0 0 0 3 3 0 2 3 3 1 1 3 3 2 2 0 3
 1 2 0 3 1 3 1 1 0 2 3 2 2 3 1 2 2 1 3 0 2 3 3 1 1 1 1 3 3 3 3 1 2 0 3 0 3
 3 3 3 1 3 3 1 0 3 3 2 0 2 0 0 3 0 3 1 1 2 1 2 3 1 3 2 0 0 2 0 0 1 0 2 1 1
 1 3 2 3 2 3 2 2 2 0 0 0 3 2 1 3 1 3 2 2 0 3 1 3 3 0 3 2 3 3 3 1 1 2 1 1 0
 2 2 0 1 0 1 2 3 0 1 2 1 1 3 0 3 1 1 3 2 0 3 1 3 1 0 2 0 3 1 0 3 0 3 1 2 0
 3 0 3 0 3 1 0 0 0 3 1 2 3 1 0 0 2 2 1 3 3 3 2 1 2 3 0 1 1 3 1 0 3 0 1 0 0
 1 3 2 0 1 2 3 2 0 0 0 3 3 2 2 0 3 1 3 1 0 3 2 3 1 1 0 1 1 3 2 1 3 1 0 0 2
 3 0 1 2 1 3 1 1 2 3 3 2 0 2 3 0 0 1 1 0 0 2 1 1 1 0 1 3 2 0 0 0 3 1 3 1 3
 1 0 0 2 3 1 0 1 1 3 0 0 2 2 3 1 2 2 2 2 0 1 3 3 3 0 3 0 2 0 0 2 1 3 1 2 0
 0 2 2 3 3 0 2 0 3 1 0 2 3 3 0 0 3 3 1 3 2 3 0 3 1 3 2 3 1 2 2 1 3 3 3 1 3
 1 0 1 2 3

In [26]:
# comparing results with actual data
pd.DataFrame({'Actual': y_test, 'Predicted': price_predict})

Unnamed: 0,Actual,Predicted
992,1,1
746,3,3
1066,3,3
425,2,3
490,0,0
...,...,...
720,3,3
940,3,3
61,0,0
272,3,3


In [35]:
# calculating of metrix
mae = mean_absolute_error(y_test, price_predict)
mse = mean_squared_error(y_test, price_predict)
rmse = mean_squared_error(y_test, price_predict, squared = False)
relative_error = round(rmse/price_predict.mean()*100, 2)

print(f'{mse = }\n{rmse = }\n{relative_error = }%\n')

mse = 0.0914179104477612
rmse = 0.30235394895347606
relative_error = 19.44%



### Decision Tree Classifier

In [57]:
dfc = DecisionTreeClassifier()
dfc.fit(X_train, y_train)
accuracy = dfc.score(X_test, y_test)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8041044776119403


In [62]:
price_predict = dfc.predict(X_test)
print(f'Predict price:\n{price_predict}')

Predict price:
[1 3 3 3 0 3 2 3 2 0 3 3 3 1 2 3 3 2 2 3 0 1 2 1 0 2 1 1 2 2 0 1 0 1 3 2 0
 3 0 1 0 3 1 1 2 3 0 0 2 2 2 1 0 2 0 2 2 3 0 3 0 1 2 0 1 0 2 1 2 2 3 1 1 3
 0 3 2 0 1 3 0 3 1 3 0 3 1 3 0 2 3 1 0 0 2 2 3 2 2 3 3 1 0 1 0 0 3 1 3 1 1
 1 3 0 0 2 3 0 1 1 0 1 0 2 0 1 2 1 2 3 2 1 0 0 3 3 0 2 3 3 1 1 3 3 1 2 0 3
 1 2 1 2 1 3 2 2 0 2 3 2 2 3 1 2 2 1 3 0 2 3 3 1 1 1 1 3 3 3 3 1 2 0 3 0 3
 3 3 3 1 3 3 1 0 3 3 2 1 2 0 0 2 0 3 1 1 1 1 2 3 1 3 2 0 1 2 0 1 1 0 2 2 1
 1 3 2 2 2 2 2 2 2 0 0 1 3 2 1 3 2 3 2 2 0 3 1 3 3 0 3 2 3 3 2 1 1 2 1 1 1
 2 2 0 1 0 1 2 3 0 1 3 1 2 3 0 3 1 1 3 3 0 3 1 3 1 0 2 0 3 1 0 2 0 3 1 1 0
 3 0 3 0 3 0 0 0 0 2 1 2 2 1 0 0 2 2 1 3 3 3 2 1 1 3 0 2 1 3 1 0 2 0 1 1 0
 1 3 2 0 2 2 3 2 0 0 0 3 2 2 2 0 3 1 3 0 0 3 2 3 0 1 0 1 2 3 2 1 2 1 0 0 2
 3 0 1 2 1 2 1 1 2 3 3 1 0 2 3 0 1 1 2 0 0 2 1 1 2 0 1 3 1 0 0 0 3 1 3 1 3
 1 0 0 2 3 1 0 1 1 3 0 0 2 3 3 1 2 1 2 2 1 1 3 3 3 0 3 0 2 0 1 2 0 3 1 2 0
 0 2 2 3 3 0 2 0 3 1 0 2 3 3 0 0 3 3 1 3 2 3 0 3 1 3 2 2 1 1 3 1 3 3 3 1 3
 1 1 2 2 3

In [30]:
# comparing results with actual data
pd.DataFrame({'Actual': y_test, 'Predicted': price_predict})

Unnamed: 0,Actual,Predicted
992,1,1
746,3,3
1066,3,3
425,2,3
490,0,0
...,...,...
720,3,2
940,3,3
61,0,0
272,3,2


In [61]:
# calculating of metrix
mae = mean_absolute_error(y_test, price_predict)
mse = mean_squared_error(y_test, price_predict)
rmse = mean_squared_error(y_test, price_predict, squared=False)
relative_error = round(rmse/price_predict.mean()*100, 2)

print(f'{mse = }\n{rmse = }\n{relative_error = }%\n')

mse = 0.10354477611940298
rmse = 0.32178374122911024
relative_error = 20.73%

