In [1]:
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
import os
import shutil
import pywt
import threading

In [4]:
#Loading the Cascade Classifier file
haar_cascade=cv2.CascadeClassifier('/usr/local/lib/python3.10/dist-packages/cv2/data/haarcascade_frontalface_default.xml')
eye_cascade=cv2.CascadeClassifier('/usr/local/lib/python3.10/dist-packages/cv2/data/haarcascade_eye.xml')

In [None]:
#to get all cropped faces in an image
def get_cropped_faces_with_2_eyes(image_path):
 original_image=cv2.imread(image_path)
 gray_image=cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
 faces=haar_cascade.detectMultiScale(gray_image, 1.2, 4)
 faces_list=[]
 for x,y,w,h in faces:
  face_gray=gray_image[y:y+h, x:x+w]
  eyes=eye_cascade.detectMultiScale(face_gray)
  if len(eyes)>=2:
      face_img=original_image[y:y+h, x:x+w]
      faces_list.append(face_img)
 return faces_list

In [None]:
#collecting all face images of a celebrity
def collect_face_images(name):
 face_images=[]
 for file in os.listdir(f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/original images/{name}'):
  try:
   faces_list=get_cropped_faces_with_2_eyes(f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/original images/{name}/{file}')
  except:
   print("Invalid image file:", file)
   continue
  if not faces_list:
   print("no face found", file)
  for face in faces_list:
   face_images.append(face)
 return face_images

In [None]:
'''face_images=collect_face_images('Akshay Kumar')
for i,image in enumerate(face_images):
 plt.subplot(12,5,i+1)
 plt.imshow(image)
plt.show()'''

"face_images=collect_face_images('Akshay Kumar')\nfor i,image in enumerate(face_images):\n plt.subplot(12,5,i+1)\n plt.imshow(image)\nplt.show()"

In [None]:
#saving all face images of a celebrity
def save_face_images(name):
  new_path=f'/content/drive/MyDrive/Indian Celebrity Prediction/datasets/images/cropped faces/{name}'
  if os.path.exists(new_path):
    if len(os.listdir(new_path))>80:
        return
    shutil.rmtree(new_path)
  os.mkdir(new_path)
  face_images=collect_face_images(name)
  for i,image in enumerate(face_images):
    cv2.imwrite(f'{new_path}/{i}.jpg', image)
  else:
    print(i, name, "cropped faces saved successfully")

In [2]:
celebrities=["Alia Bhatt", "Amitabh Bachchan", "Anupam Kher", "Deepika Padukone", "Hema Malini", "Rajkumar Rao", "Sachin Tendulkar", "Salman Khan", "Shah Rukh Khan", "Shilpa Shetty"]

In [None]:
for name in celebrities:
    save_face_images(name)
    print(name, "face images saved successfully")

In [None]:
save_face_images('Shah Rukh Khan')

In [3]:
#to do wavelet transformation of given image
def image_wavelet_transform(image, mode='haar', level=1):
 image_gray=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 imageArray=np.float32(image_gray)/255
 coeff=pywt.wavedec2(imageArray, mode, level=level)
 coeff[0]*=0
 imageArray_wt=pywt.waverec2(coeff, mode)
 imageArray_wt=np.uint8(imageArray_wt*255)
 return imageArray_wt

In [4]:
#to collect color and wavelet transformed image data in a array of a celebrity
def collect_image_arrays(name):
 xdata=[]
 path=f'datasets/images/cropped faces/{name}'
 for file in os.listdir(path):
  face_img=cv2.imread(f'{path}/{file}')
  image_scaled=cv2.resize(face_img, (64,64))
  imageArray_wt=image_wavelet_transform(face_img, 'db1', 5)
  image_wt_scaled=cv2.resize(imageArray_wt, (64,64))
  combined_img=np.vstack((image_scaled.reshape(-1,1), image_wt_scaled.reshape(-1,1)))
  combined_img=combined_img.reshape(1,-1)
  combined_img=np.float32(combined_img)
  xdata.append(combined_img[0])
  #plt.imshow(image_wt_scaled, cmap='gray')
 return xdata

In [5]:
#to collect image data of all celebrities in a dataframe
celebrity_data=pd.DataFrame(columns=range(64*64*3 + 64*64))
celebrity_name=pd.Series(dtype='str')
for name in celebrities:
    xdata=pd.DataFrame(collect_image_arrays(name), columns=celebrity_data.columns)
    celebrity_data=pd.concat([celebrity_data, xdata])
    celebrity_name=celebrity_name._append(pd.Series([name]*len(xdata)))
    print(name, 'data collected successfully')

  celebrity_data=pd.concat([celebrity_data, xdata])


Alia Bhatt data collected successfully
Amitabh Bachchan data collected successfully
Anupam Kher data collected successfully
Deepika Padukone data collected successfully
Hema Malini data collected successfully
Rajkumar Rao data collected successfully
Sachin Tendulkar data collected successfully
Salman Khan data collected successfully
Shah Rukh Khan data collected successfully
Shilpa Shetty data collected successfully


In [6]:
celebrity_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16374,16375,16376,16377,16378,16379,16380,16381,16382,16383
0,13.0,12.0,22.0,15.0,11.0,22.0,19.0,11.0,21.0,18.0,...,3.0,121.0,253.0,249.0,249.0,76.0,184.0,237.0,25.0,3.0
1,27.0,27.0,39.0,29.0,32.0,40.0,30.0,34.0,38.0,32.0,...,243.0,239.0,243.0,247.0,248.0,240.0,9.0,29.0,2.0,251.0
2,40.0,43.0,48.0,48.0,49.0,57.0,42.0,42.0,54.0,26.0,...,12.0,248.0,250.0,13.0,30.0,3.0,237.0,244.0,242.0,232.0
3,11.0,6.0,7.0,18.0,11.0,15.0,28.0,19.0,30.0,38.0,...,249.0,237.0,238.0,236.0,239.0,234.0,244.0,247.0,72.0,25.0
4,9.0,21.0,25.0,7.0,19.0,25.0,5.0,15.0,22.0,6.0,...,106.0,99.0,208.0,12.0,120.0,222.0,108.0,23.0,116.0,80.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,33.0,42.0,60.0,48.0,58.0,82.0,62.0,73.0,101.0,60.0,...,243.0,238.0,249.0,204.0,79.0,246.0,251.0,84.0,7.0,2.0
46,64.0,78.0,96.0,58.0,65.0,79.0,50.0,47.0,53.0,42.0,...,249.0,8.0,4.0,19.0,3.0,10.0,168.0,219.0,17.0,80.0
47,68.0,94.0,149.0,100.0,129.0,188.0,110.0,144.0,209.0,116.0,...,242.0,241.0,236.0,222.0,206.0,208.0,215.0,89.0,44.0,250.0
48,24.0,10.0,11.0,25.0,13.0,12.0,27.0,14.0,19.0,28.0,...,3.0,10.0,20.0,254.0,211.0,41.0,224.0,120.0,59.0,103.0


In [7]:
celebrity_name

0        Alia Bhatt
1        Alia Bhatt
2        Alia Bhatt
3        Alia Bhatt
4        Alia Bhatt
          ...      
45    Shilpa Shetty
46    Shilpa Shetty
47    Shilpa Shetty
48    Shilpa Shetty
49    Shilpa Shetty
Length: 808, dtype: object

In [8]:
celebrity_name.value_counts()

Hema Malini         104
Rajkumar Rao        104
Shah Rukh Khan      101
Deepika Padukone     98
Alia Bhatt           92
Salman Khan          83
Amitabh Bachchan     74
Anupam Kher          61
Shilpa Shetty        50
Sachin Tendulkar     41
Name: count, dtype: int64

In [9]:
final_data=pd.concat([celebrity_data, celebrity_name.rename('celebrity_name')], axis=1)
final_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16375,16376,16377,16378,16379,16380,16381,16382,16383,celebrity_name
0,13.0,12.0,22.0,15.0,11.0,22.0,19.0,11.0,21.0,18.0,...,121.0,253.0,249.0,249.0,76.0,184.0,237.0,25.0,3.0,Alia Bhatt
1,27.0,27.0,39.0,29.0,32.0,40.0,30.0,34.0,38.0,32.0,...,239.0,243.0,247.0,248.0,240.0,9.0,29.0,2.0,251.0,Alia Bhatt
2,40.0,43.0,48.0,48.0,49.0,57.0,42.0,42.0,54.0,26.0,...,248.0,250.0,13.0,30.0,3.0,237.0,244.0,242.0,232.0,Alia Bhatt
3,11.0,6.0,7.0,18.0,11.0,15.0,28.0,19.0,30.0,38.0,...,237.0,238.0,236.0,239.0,234.0,244.0,247.0,72.0,25.0,Alia Bhatt
4,9.0,21.0,25.0,7.0,19.0,25.0,5.0,15.0,22.0,6.0,...,99.0,208.0,12.0,120.0,222.0,108.0,23.0,116.0,80.0,Alia Bhatt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,33.0,42.0,60.0,48.0,58.0,82.0,62.0,73.0,101.0,60.0,...,238.0,249.0,204.0,79.0,246.0,251.0,84.0,7.0,2.0,Shilpa Shetty
46,64.0,78.0,96.0,58.0,65.0,79.0,50.0,47.0,53.0,42.0,...,8.0,4.0,19.0,3.0,10.0,168.0,219.0,17.0,80.0,Shilpa Shetty
47,68.0,94.0,149.0,100.0,129.0,188.0,110.0,144.0,209.0,116.0,...,241.0,236.0,222.0,206.0,208.0,215.0,89.0,44.0,250.0,Shilpa Shetty
48,24.0,10.0,11.0,25.0,13.0,12.0,27.0,14.0,19.0,28.0,...,10.0,20.0,254.0,211.0,41.0,224.0,120.0,59.0,103.0,Shilpa Shetty


In [10]:
print(final_data.shape)

(808, 16385)


In [11]:
final_data.to_csv("datasets/celebrity_face_data.csv", index=False)