# MACHINE LEARNING APPROACH

In [1]:
import cv2
import os
import pandas as pd

In [2]:
# Define a function to extract RGB values from an image
def extract_color_values(image_path):
    image = cv2.imread(image_path)
    b, g, r = cv2.split(image)
    
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv_image) 

    lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b_lab = cv2.split(lab_image)
    
    return r.mean(), g.mean(), b.mean(),h.mean(), s.mean(), v.mean(),l.mean(), a.mean(), b_lab.mean()

In [3]:
# Define a function to process multiple folders of images
def process_folders(folder_paths, labels):
    data = []
    for folder_path, label in zip(folder_paths, labels):
        for filename in os.listdir(folder_path):
            if filename.endswith(".jpg") or filename.endswith(".png"): 
                image_path = os.path.join(folder_path, filename)
                r, g, b,h, s, v,l, a, b_lab = extract_color_values(image_path)
                #r, g, b = rgb
                #h, s, v = hsv
                #l, a, b_lab = lab
                data.append({
                    'Image Name': filename,
                    'Red': r,
                    'Green': g,
                    'Blue': b,
                    'Hue': h,
                    'Saturation': s,
                    'Value': v,
                    'L': l,
                    'A': a,
                    'B': b_lab,
                    'Label': label
                })
    return data

In [4]:
# Specify the folder path and labels
folder_paths = ['C:/Users/User/Downloads/Dataset/zebra long wing', 'C:/Users/User/Downloads/Dataset/wood satyr']
labels = [1, 0]

In [5]:
data = process_folders(folder_paths, labels)

# Create a DataFrame and save it to an Excel file
df = pd.DataFrame(data)
df.to_excel('extracted_feature.xlsx', index=False)

In [6]:
 df= pd.read_excel('extracted_feature.xlsx')
print(df)

    Image Name         Red       Green        Blue        Hue  Saturation  \
0      000.jpg   97.919120   98.256835   87.277000  40.996323   60.638388   
1      001.jpg  109.386039   73.495937   46.195673  23.127814  145.195938   
2      002.jpg   96.830002  110.915227   75.909768  43.739048  103.573156   
3      003.jpg   92.024034   93.962347   66.527142  32.500055  100.523374   
4      004.jpg   86.752942   87.466574   58.931588  33.555442  110.056717   
..         ...         ...         ...         ...        ...         ...   
185    087.jpg  137.908292  136.374694   85.006842  25.774663  120.020067   
186    088.jpg   78.364395   97.975266   39.686118  32.012033  156.007288   
187    089.jpg   94.310051   99.830265   60.070091  32.598352  135.352086   
188    090.jpg  130.402507  139.851264  117.062772  51.678372   66.423873   
189    091.jpg  140.800973  142.984808   81.720183  28.225859  138.753457   

          Value           L           A           B  Label  
0    104.50298

In [9]:
# FOR KNN

x = df[['Red', 'Green', 'Blue','Hue','Saturation','Value','L','A','B']]
y = df['Label']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()
classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print ("Accuracy Score:",accuracy_score(y_test, y_pred))
print ("Precision Score:",precision_score(y_test, y_pred))
print ("Recall Score:",recall_score(y_test, y_pred))
print ("F1 Score",f1_score(y_test, y_pred))

Accuracy Score: 0.7368421052631579
Precision Score: 0.6956521739130435
Recall Score: 0.8421052631578947
F1 Score 0.761904761904762


In [10]:
# FOR DECISION TREE

x=df[['Red', 'Green', 'Blue','Hue','Saturation','Value','L','A','B']]
y= df['Label']

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2)

from sklearn import tree 
classifier= tree.DecisionTreeClassifier()

classifier.fit(x_train,y_train)
y_pred= classifier.predict(x_test)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print ("Accuracy Score:",accuracy_score(y_test, y_pred)*100)
print ("Precision Score:",precision_score(y_test, y_pred)*100)
print ("Recall Score:",recall_score(y_test, y_pred)*100)
print ("F1 Score",f1_score(y_test, y_pred)*100)

Accuracy Score: 81.57894736842105
Precision Score: 86.36363636363636
Recall Score: 82.6086956521739
F1 Score 84.44444444444444


In [None]:
# FOR RANDOM FOREST

x = df[['Red', 'Green', 'Blue','Hue','Saturation','Value','L','A','B']]
y = df['Label']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()

classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

print ("Accuracy Score:",accuracy_score(y_test, y_pred)*100)
print ("Precision Score:",precision_score(y_test, y_pred)*100)
print ("Recall Score:",recall_score(y_test, y_pred)*100)
print ("F1 Score",f1_score(y_test, y_pred)*100)