<a href="https://colab.research.google.com/github/anmolkhurana490/Indian-Celebrity-Face-Classification/blob/main/celebrity_face_cropping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
import os
import shutil
import pywt
import threading

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Loading the Cascade Classifier file
haar_cascade=cv2.CascadeClassifier('/usr/local/lib/python3.10/dist-packages/cv2/data/haarcascade_frontalface_default.xml')
eye_cascade=cv2.CascadeClassifier('/usr/local/lib/python3.10/dist-packages/cv2/data/haarcascade_eye.xml')

In [None]:
#to get all cropped faces in an image
def get_cropped_faces_with_2_eyes(image_path):
 original_image=cv2.imread(image_path)
 gray_image=cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
 faces=haar_cascade.detectMultiScale(gray_image, 1.2, 4)
 faces_list=[]
 for x,y,w,h in faces:
  face_gray=gray_image[y:y+h, x:x+w]
  eyes=eye_cascade.detectMultiScale(face_gray)
  if len(eyes)>=2:
      face_img=original_image[y:y+h, x:x+w]
      faces_list.append(face_img)
 return faces_list

In [None]:
#collecting all face images of a celebrity
def collect_face_images(name):
 face_images=[]
 for file in os.listdir(f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/original images/{name}'):
  try:
   faces_list=get_cropped_faces_with_2_eyes(f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/original images/{name}/{file}')
  except:
   print("Invalid image file:", file)
   continue
  if not faces_list:
   print("no face found", file)
  for face in faces_list:
   face_images.append(face)
 return face_images

In [None]:
'''face_images=collect_face_images('Akshay Kumar')
for i,image in enumerate(face_images):
 plt.subplot(12,5,i+1)
 plt.imshow(image)
plt.show()'''

"face_images=collect_face_images('Akshay Kumar')\nfor i,image in enumerate(face_images):\n plt.subplot(12,5,i+1)\n plt.imshow(image)\nplt.show()"

In [None]:
#saving all face images of a celebrity
def save_face_images(name):
  new_path=f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/cropped faces/{name}'
  if os.path.exists(new_path):
    if len(os.listdir(new_path))>80:
        return
    shutil.rmtree(new_path)
  os.mkdir(new_path)
  face_images=collect_face_images(name)
  for i,image in enumerate(face_images):
    cv2.imwrite(f'{new_path}/{i}.jpg', image)
  else:
    print(i, name, "cropped faces saved successfully")

In [None]:
celebrities=["Alia Bhatt", "Amitabh Bachchan", "Anupam Kher", "Deepika Padukone", "Hema Malini", "Rajkumar Rao", "Sachin Tendulkar", "Salman Khan", "Shah Rukh Khan", "Shilpa Shetty"]

In [None]:
for name in celebrities:
    save_face_images(name)
    print(name, "face images saved successfully")

In [None]:
save_face_images('Shah Rukh Khan')

In [None]:
#to do wavelet transformation of given image
def image_wavelet_transform(image, mode='haar', level=1):
 image_gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 imageArray=np.float32(image_gray)/255
 coeff=pywt.wavedec2(imageArray, mode, level=level)
 coeff[0]*=0
 imageArray_wt=pywt.waverec2(coeff, mode)
 imageArray_wt=np.uint8(imageArray_wt*255)
 return imageArray_wt

In [None]:
#to collect color and wavelet transformed image data in a array of a celebrity
def collect_image_arrays(name):
 xdata=[]
 path=f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/cropped faces/{name}'
 for file in os.listdir(path):
  face_img=cv2.imread(f'{path}/{file}')
  image_scaled=cv2.resize(face_img, (64,64))
  imageArray_wt=image_wavelet_transform(face_img, 'db1', 5)
  image_wt_scaled=cv2.resize(imageArray_wt, (64,64))
  combined_img=np.vstack((image_scaled.reshape(-1,1), image_wt_scaled.reshape(-1,1)))
  combined_img=combined_img.reshape(1,-1)
  combined_img=np.float32(combined_img)
  xdata.append(combined_img[0])
  #plt.imshow(image_wt_scaled, cmap='gray')
 return xdata

In [None]:
#to collect image data of all celebrities in a dataframe
celebrity_data=pd.DataFrame(columns=range(64*64*3 + 64*64))
celebrity_name=pd.Series(dtype='str')
for name in celebrities:
    xdata=pd.DataFrame(collect_image_arrays(name), columns=celebrity_data.columns)
    celebrity_data=pd.concat([celebrity_data, xdata])
    celebrity_name=celebrity_name._append(pd.Series([name]*len(xdata)))
    print(name, 'data collected successfully')

Alia Bhatt data collected successfully
Amitabh Bachchan data collected successfully
Anupam Kher data collected successfully
Deepika Padukone data collected successfully
Hema Malini data collected successfully
Rajkumar Rao data collected successfully
Sachin Tendulkar data collected successfully
Salman Khan data collected successfully
Shah Rukh Khan data collected successfully
Shilpa Shetty data collected successfully


In [None]:
celebrity_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16374,16375,16376,16377,16378,16379,16380,16381,16382,16383
0,179.0,199.0,176.0,179.0,199.0,176.0,178.0,198.0,175.0,179.0,...,102.0,23.0,26.0,11.0,18.0,28.0,60.0,249.0,3.0,17.0
1,10.0,13.0,17.0,10.0,12.0,17.0,12.0,15.0,20.0,16.0,...,189.0,0.0,219.0,2.0,0.0,0.0,100.0,254.0,3.0,40.0
2,62.0,129.0,152.0,115.0,146.0,151.0,202.0,209.0,207.0,168.0,...,208.0,150.0,68.0,197.0,251.0,244.0,245.0,253.0,254.0,233.0
3,80.0,69.0,79.0,95.0,82.0,90.0,66.0,53.0,61.0,70.0,...,9.0,17.0,246.0,225.0,16.0,248.0,242.0,18.0,17.0,215.0
4,17.0,17.0,17.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,253.0,153.0,4.0,19.0,31.0,2.0,1.0,249.0,2.0,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,63.0,45.0,52.0,91.0,72.0,81.0,75.0,55.0,67.0,87.0,...,0.0,2.0,5.0,4.0,0.0,254.0,253.0,255.0,147.0,1.0
86,243.0,246.0,237.0,239.0,241.0,235.0,241.0,241.0,240.0,239.0,...,109.0,113.0,109.0,105.0,110.0,126.0,122.0,150.0,243.0,20.0
87,35.0,70.0,134.0,16.0,41.0,102.0,24.0,39.0,87.0,17.0,...,251.0,235.0,45.0,9.0,228.0,45.0,120.0,105.0,232.0,43.0
88,16.0,29.0,25.0,1.0,23.0,18.0,96.0,132.0,125.0,145.0,...,231.0,232.0,243.0,24.0,8.0,251.0,242.0,143.0,7.0,2.0


In [None]:
celebrity_name

0        Alia Bhatt
1        Alia Bhatt
2        Alia Bhatt
3        Alia Bhatt
4        Alia Bhatt
          ...      
85    Shilpa Shetty
86    Shilpa Shetty
87    Shilpa Shetty
88    Shilpa Shetty
89    Shilpa Shetty
Length: 732, dtype: object

In [None]:
celebrity_name.value_counts()

Alia Bhatt          90
Shilpa Shetty       90
Salman Khan         83
Rajkumar Rao        82
Hema Malini         78
Shah Rukh Khan      76
Deepika Padukone    71
Anupam Kher         60
Amitabh Bachchan    54
Sachin Tendulkar    48
Name: count, dtype: int64

In [None]:
final_data=pd.concat([celebrity_data, celebrity_name.rename('celebrity_name')], axis=1)
final_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16375,16376,16377,16378,16379,16380,16381,16382,16383,celebrity_name
0,179.0,199.0,176.0,179.0,199.0,176.0,178.0,198.0,175.0,179.0,...,23.0,26.0,11.0,18.0,28.0,60.0,249.0,3.0,17.0,Alia Bhatt
1,10.0,13.0,17.0,10.0,12.0,17.0,12.0,15.0,20.0,16.0,...,0.0,219.0,2.0,0.0,0.0,100.0,254.0,3.0,40.0,Alia Bhatt
2,62.0,129.0,152.0,115.0,146.0,151.0,202.0,209.0,207.0,168.0,...,150.0,68.0,197.0,251.0,244.0,245.0,253.0,254.0,233.0,Alia Bhatt
3,80.0,69.0,79.0,95.0,82.0,90.0,66.0,53.0,61.0,70.0,...,17.0,246.0,225.0,16.0,248.0,242.0,18.0,17.0,215.0,Alia Bhatt
4,17.0,17.0,17.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,...,153.0,4.0,19.0,31.0,2.0,1.0,249.0,2.0,24.0,Alia Bhatt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,63.0,45.0,52.0,91.0,72.0,81.0,75.0,55.0,67.0,87.0,...,2.0,5.0,4.0,0.0,254.0,253.0,255.0,147.0,1.0,Shilpa Shetty
86,243.0,246.0,237.0,239.0,241.0,235.0,241.0,241.0,240.0,239.0,...,113.0,109.0,105.0,110.0,126.0,122.0,150.0,243.0,20.0,Shilpa Shetty
87,35.0,70.0,134.0,16.0,41.0,102.0,24.0,39.0,87.0,17.0,...,235.0,45.0,9.0,228.0,45.0,120.0,105.0,232.0,43.0,Shilpa Shetty
88,16.0,29.0,25.0,1.0,23.0,18.0,96.0,132.0,125.0,145.0,...,232.0,243.0,24.0,8.0,251.0,242.0,143.0,7.0,2.0,Shilpa Shetty


In [None]:
print(final_data.shape)

(732, 16385)


In [None]:
final_data.to_csv("/content/drive/MyDrive/Indian Celebrity Prediction/datasets/celebrity_face_data.csv", index=False)