In [1]:
import pandas as pd
import pickle


#DATA UNDERSTANDING

In [2]:
makeupdata = pd.read_csv("https://raw.githubusercontent.com/htetlwink/AIProjectTest/main/dataset/makeupdataset.csv")

In [3]:
makeupdata.shape

(172, 13)

In [4]:
makeupdata.columns

Index(['Hex_Code', 'R', 'G', 'B', 'Core_Skin_Type', 'Skin_Type', 'Core_Color',
       'Foundation_Color', 'Blush_Color', 'Lipstick_Color', 'Eyeshadow_Color',
       'Highlighter_Color', 'Contour_Color'],
      dtype='object')

In [5]:
makeupdata["Core_Skin_Type"].value_counts()

Unnamed: 0_level_0,count
Core_Skin_Type,Unnamed: 1_level_1
Dry,78
Oily,35
Sensitive,33
Combination,26


In [6]:
makeupdata["Core_Color"].value_counts()

Unnamed: 0_level_0,count
Core_Color,Unnamed: 1_level_1
Mocha,74
Natural,58
Pale,24
Golden,16


#DATA PROCESSING

WE DELETE UN NESSEARY COLUMNS

In [7]:
makeupdata = makeupdata.drop(columns=["Hex_Code","Skin_Type","Foundation_Color","Blush_Color","Lipstick_Color","Eyeshadow_Color","Highlighter_Color","Contour_Color"])
makeupdata.head()

Unnamed: 0,R,G,B,Core_Skin_Type,Core_Color
0,255,250,250,Oily,Pale
1,255,240,225,Dry,Pale
2,250,231,211,Combination,Pale
3,240,213,190,Sensitive,Pale
4,216,185,152,Combination,Natural


Duplicate data set for encoding

In [8]:
df = makeupdata.copy()
target = 'Core_Color'
encode = ['Core_Skin_Type']

WE use ONE HOT ENCODING

In [9]:
for col in encode:
    dummy = pd.get_dummies(df[col], prefix=col)
    df = pd.concat([df,dummy], axis=1)
    del df[col]

In [10]:
df.head()

Unnamed: 0,R,G,B,Core_Color,Core_Skin_Type_Combination,Core_Skin_Type_Dry,Core_Skin_Type_Oily,Core_Skin_Type_Sensitive
0,255,250,250,Pale,False,False,True,False
1,255,240,225,Pale,False,True,False,False
2,250,231,211,Pale,True,False,False,False
3,240,213,190,Pale,False,False,False,True
4,216,185,152,Natural,True,False,False,False


We Map our target to specific Number

In [11]:
target_mapper = {'Pale':0, 'Natural':1, 'Golden':2, 'Mocha':3}
def target_encode(val):
    return target_mapper[val]

In [12]:
df['Core_Color'] = df['Core_Color'].apply(target_encode)

In [13]:
df.head()

Unnamed: 0,R,G,B,Core_Color,Core_Skin_Type_Combination,Core_Skin_Type_Dry,Core_Skin_Type_Oily,Core_Skin_Type_Sensitive
0,255,250,250,0,False,False,True,False
1,255,240,225,0,False,True,False,False
2,250,231,211,0,True,False,False,False
3,240,213,190,0,False,False,False,True
4,216,185,152,1,True,False,False,False


Seperate X and Y data

In [14]:
X = df.drop('Core_Color', axis=1)
Y = df['Core_Color']

Split the dataset into 80% train data and 20 % test data

In [15]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)

#Fit in the Model using Random Forest Classifier

In [16]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X, Y)

#Export the Model

In [23]:
with open('Trained_model.pkl', 'wb') as file:
    pickle.dump(clf, file)

##Load the Model

In [24]:
with open('Trained_model.pkl', 'rb') as file:
    clf = pickle.load(file)

Model Prediction

In [18]:
y_pred = clf.predict(x_test)

Confusion Matrix

In [25]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

[[ 3  0  0  0]
 [ 0 13  0  1]
 [ 0  0  3  0]
 [ 0  0  0 15]]


Result

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print("Accuracy score is", accuracy_score(y_test, y_pred)*100,"%")
print("Precision score is" ,precision_score(y_test, y_pred, average = "macro")*100,"%")
print("Recall score is", recall_score(y_test, y_pred, average = "macro")*100,"%")
print("F1 score is", f1_score(y_test, y_pred, average = "macro")*100,"%")

Accuracy score is 97.14285714285714 %
Precision score is 98.4375 %
Recall score is 98.21428571428572 %
F1 score is 98.26762246117084 %
