# COLAB/ORGANIZADOR

Para funcionar, deve-se ter um arquivo .zip com uma pasta com o nome do evento dentro dele, dentro desta pasta devem estar as imagens.


# Setup and Authentication
As células abaixo configuram seu ambiente com funções úteis, além de estabelecer a conexão com a pasta correta do Google Drive.

Você terá acesso às funções:
1.   list_files_in_shared_folder
2.   create_drive_folder
3.   upload_file_to_drive
4.   upload_folder_to_drive
5.   download_file
6.   unzip_file
7.   delete_file_from_drive



## Setup

In [None]:
!pip install --quiet google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
!pip install --quiet --upgrade google-api-python-client
!pip install --quiet opencv-python
!pip install --quiet retina-face
!pip install --quiet deepface

from google.colab import auth
import googleapiclient
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

import os
import zipfile
from tqdm import tqdm

import cv2
import numpy as np
import pandas as pd
from deepface import DeepFace
from retinaface import RetinaFace

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.7/12.7 MB[0m [31m86.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.6/108.6 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for fire (setup.py) ... [?25l[?25hdone
24-12-19 02:44:20 - Directory /root/.deepface has been created
24-12-19 02:44:20 - Directory /root/.deepface/weights has been created


In [None]:
# Necessary paths with respective Google Drive IDs (follows this: drive.google.com/drive/u/1/folders/{ID})
SHOWFACE_FOLDER = '1iUXrRc9p0CL-xSXXBuPjjQ0xHtL9YimY' # main showface folder
ZIPS_FOLDER = '1-pjXAWbWHAzGSBKIIKBmubmZbJKxVVYMM33OpclJdDNgd4cMPA4gnzZzgJTdWwUXIvlfch1x' # zips of sent files
PROCESSED_ZIPS_FOLDER = '1EIiWPBJuMbc1feUWrb3R5mwm72Gv4fm9'
ALBUMS_FOLDER = '1ra03dT_nthAPFHh_zFeKfgT91_kiU2Cq' # only extracted images on this one
ALBUMS_DOCS = '1RcmynInVP3jFfeqd7g8ocq8El1iOg2Qa3VpBGc-_JNY'

### Useful functions

In [None]:
def list_files_in_shared_folder(folder_id):
    query = f"'{folder_id}' in parents and trashed = false"
    results = drive_service.files().list(q=query, fields="files(id, name)").execute()
    files = results.get('files', [])
    if not files:
        print("No files found in the shared folder.")
    else:
        print("Files in the shared folder:")
        for file in files:
            print(f"Name: {file['name']}, ID: {file['id']}")
    return files

In [None]:
def create_drive_folder(folder_name, parent_folder_id=None):
    """Create a folder in Google Drive if it doesn't exist."""

    # Build the query to search for existing folders
    query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder'"
    if parent_folder_id:
        query += f" and '{parent_folder_id}' in parents"

    # Search for existing folders
    results = drive_service.files().list(q=query, fields="files(id)").execute()
    existing_folders = results.get('files', [])

    # If a folder with the same name already exists, return its ID
    if existing_folders:
        folder_id = existing_folders[0]['id']
        print(f"Folder '{folder_name}' already exists with ID: {folder_id}")
        return folder_id

    # If the folder doesn't exist, create it
    folder_metadata = {
        'name': folder_name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    if parent_folder_id:
        folder_metadata['parents'] = [parent_folder_id]

    folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
    print(f"Created folder '{folder_name}' with ID: {folder['id']}")
    return folder['id']

In [None]:
def upload_file_to_drive(file_path, parent_folder_id):
    """Upload a file to a specified Google Drive folder."""
    file_name = os.path.basename(file_path)
    file_metadata = {
        'name': file_name,
        'parents': [parent_folder_id]
    }
    media = MediaFileUpload(file_path, resumable=True)
    uploaded_file = drive_service.files().create(
        body=file_metadata,
        media_body=media,
        fields='id'
    ).execute()
    # print(f"Uploaded file '{file_name}' with ID: {uploaded_file['id']}")

In [None]:
def upload_folder_to_drive(local_folder_path, drive_parent_folder_id):
    """Recursively upload a local folder to a specified Google Drive folder."""
    for root, dirs, files in os.walk(local_folder_path):
        # Maintain a mapping of local folders to their Drive counterparts
        folder_mapping = {local_folder_path: drive_parent_folder_id}

        # Traverse directories and create them in Google Drive
        for directory in dirs:
            local_dir_path = os.path.join(root, directory)
            drive_folder_id = create_drive_folder(directory, parent_folder_id=folder_mapping[root])
            folder_mapping[local_dir_path] = drive_folder_id

        # Traverse files and upload them to Google Drive
        print('Uploading files')
        for file in tqdm(files):
            file_path = os.path.join(root, file)
            upload_file_to_drive(file_path, parent_folder_id=folder_mapping[root])

In [None]:
def download_file(file_id, file_name):
    """Downloads a file from Google Drive by ID."""
    request = drive_service.files().get_media(fileId=file_id)
    file_path = f"/content/{file_name}"

    with open(file_path, 'wb') as file:
        downloader = googleapiclient.http.MediaIoBaseDownload(file, request)
        done = False
        while not done:
            status, done = downloader.next_chunk()
            print(f"Download {int(status.progress() * 100)}% complete.")

    print(f"File downloaded to {file_path}")
    return file_path

In [None]:
def unzip_file(zip_file_path, extract_to='/content/extracted'):
    """Unzips a file to the specified folder."""
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

         # Get the first part of the paths in the ZIP archive
        extracted_items = zip_ref.namelist()
        if extracted_items:
            extracted_folder_name = os.path.normpath(extracted_items[0]).split(os.sep)[0]

    extracted_folder_path = os.path.join(extract_to, extracted_folder_name)
    print(f"File unzipped to {extracted_folder_path}")
    return extracted_folder_path

In [None]:
def move_file_to_folder(file_id, target_folder_id):
    """Moves a file to a specified folder in Google Drive."""
    try:
        # Retrieve the current parent folders of the file
        file_metadata = drive_service.files().get(fileId=file_id, fields='parents').execute()
        current_parents = ",".join(file_metadata.get('parents', []))

        # Update the file's parents to move it to the target folder
        drive_service.files().update(
            fileId=file_id,
            addParents=target_folder_id,
            removeParents=current_parents,
            fields='id, parents'
        ).execute()

        print(f"File {file_id} successfully moved to folder {target_folder_id}.")
    except Exception as e:
        print(f"An error occurred while moving the file: {e}")

In [None]:
def get_files_in_folder(folder_dir): #Scans all files in a colab directory
  # Returns list with every filename
  scanned_files = []
  dir = folder_dir

  if os.path.exists(dir):
      for root, dirs, files in os.walk(dir):
          for file in files:
              scanned_files.append(file)
  else:
      print("Directory does not exist")

  return scanned_files

In [None]:
def create_folder(new_folder_dir):
  # Creates folder in colab
  folder_dir = new_folder_dir

  if not os.path.exists(folder_dir):
      os.makedirs(folder_dir)
      print(f"Created directory: {folder_dir}")
  else:
      print(f"Directory already exists: {folder_dir}")

  return folder_dir

In [None]:
def get_google_sheet_as_dataframe(file_id):
    """Download a Google Spreadsheet as CSV and return it as a pandas DataFrame."""
    try:
        # Export the spreadsheet as CSV
        request = drive_service.files().export_media(
            fileId=file_id,
            mimeType='text/csv'
        )
        # Save the exported CSV to memory
        csv_data = request.execute()

        # Load the CSV data into a pandas DataFrame
        from io import StringIO
        table_df = pd.read_csv(StringIO(csv_data.decode('utf-8')))

        return table_df

    except Exception as e:
        print(f"An error occurred while processing the spreadsheet with ID {file_id}: {e}")
        return None


In [None]:
def update_google_sheet_from_dataframe(spreadsheet_id, dataframe):
    """Update a Google Spreadsheet with the contents of a pandas DataFrame, starting at A1."""
    try:

        dataframe = dataframe.fillna("")

        # Convert the DataFrame to a list of lists (Google Sheets format)
        values = [dataframe.columns.tolist()] + dataframe.values.tolist()

        # Create the request body
        body = {
            "majorDimension": "ROWS",
            "values": values
        }

        # Use the Sheets API to update the entire spreadsheet, starting from A1
        sheets_service.spreadsheets().values().update(
            spreadsheetId=spreadsheet_id,
            range="A1",  # Always starts at the first cell
            valueInputOption="RAW",
            body=body
        ).execute()

        print(f"Spreadsheet {spreadsheet_id} updated successfully.")

    except Exception as e:
        print(f"An error occurred while updating the spreadsheet with ID {spreadsheet_id}: {e}")


## Authentication

In [None]:
# Authenticate with Colab's native method
auth.authenticate_user()

# Initialize the Drive API
SCOPES = ['https://www.googleapis.com/auth/drive']
drive_service = build('drive', 'v3')
sheets_service = build('sheets', 'v4')
print("Authentication complete!")

Authentication complete!


In [None]:
# If everything is correct, should show the shared folder items
list_files_in_shared_folder(SHOWFACE_FOLDER)

Files in the shared folder:
Name: Envio de Álbum - Organizador (respostas), ID: 1RcmynInVP3jFfeqd7g8ocq8El1iOg2Qa3VpBGc-_JNY
Name: Envio de Selfie - Usuário (respostas), ID: 1HCVTIHQ0DIQRWyl1vAuW-XU6anK_CqD_GIGGCnkcV6E
Name: Envio de Selfie - Usuário, ID: 1BNUjfznWTN6AcPNi2nABAcRUJ08CnNaoacZ-PH6kqzM
Name: Albums, ID: 1ra03dT_nthAPFHh_zFeKfgT91_kiU2Cq
Name: Colabs, ID: 1HXfpPcdXlm8LUGjPFux0vdaPyrVty2cf
Name: Envio de Álbum - Organizador, ID: 18sxdkn-rJBIpV9zsM7VYmrNdp33M3RnCgFfDiImG4ZQ
Name: Envio de Selfie - Usuário (File responses), ID: 1TIg8DPiAAzOtyOnRDDkW26F3z0by6iA1X_HskpxuPPMt1jyj76mrrB-2F6sLGSHGr9vTQntU
Name: Envio de Álbum (File responses), ID: 1pX9LdtpL9D30jHKcfBX5me8wspIDwwUZmVi2fM0NDxYpVgaG2lRpXLS3rJuLay3jDW2V2mQ8
Name: Oscar_Emmy_2023_2024, ID: 1gehG-x3DdolRQKfMzeTJh3zRovBSNltH


[{'id': '1RcmynInVP3jFfeqd7g8ocq8El1iOg2Qa3VpBGc-_JNY',
  'name': 'Envio de Álbum - Organizador (respostas)'},
 {'id': '1HCVTIHQ0DIQRWyl1vAuW-XU6anK_CqD_GIGGCnkcV6E',
  'name': 'Envio de Selfie - Usuário (respostas)'},
 {'id': '1BNUjfznWTN6AcPNi2nABAcRUJ08CnNaoacZ-PH6kqzM',
  'name': 'Envio de Selfie - Usuário'},
 {'id': '1ra03dT_nthAPFHh_zFeKfgT91_kiU2Cq', 'name': 'Albums'},
 {'id': '1HXfpPcdXlm8LUGjPFux0vdaPyrVty2cf', 'name': 'Colabs'},
 {'id': '18sxdkn-rJBIpV9zsM7VYmrNdp33M3RnCgFfDiImG4ZQ',
  'name': 'Envio de Álbum - Organizador'},
 {'id': '1TIg8DPiAAzOtyOnRDDkW26F3z0by6iA1X_HskpxuPPMt1jyj76mrrB-2F6sLGSHGr9vTQntU',
  'name': 'Envio de Selfie - Usuário (File responses)'},
 {'id': '1pX9LdtpL9D30jHKcfBX5me8wspIDwwUZmVi2fM0NDxYpVgaG2lRpXLS3rJuLay3jDW2V2mQ8',
  'name': 'Envio de Álbum (File responses)'},
 {'id': '1gehG-x3DdolRQKfMzeTJh3zRovBSNltH', 'name': 'Oscar_Emmy_2023_2024'}]

# Crop faces
Esse bloco contém a função responsável por realizar o crop das faces contidas em todas imagens extraídas.

In [None]:
def crop_faces(extracted_images_dir, cropped_images_dir):
  # script for face cropping

  print("Initializing face cropping")

  # runs face detection on every image provided
  # crops and returns cropped images on new folder

  extracted_folder = extracted_images_dir
  extracted_filenames = get_files_in_folder(extracted_images_dir)

  # backends
  backends = [
    'opencv',
    'ssd',
    'dlib',
    'mtcnn',
    'fastmtcnn',
    'retinaface',
    'mediapipe',
    'yolov8',
    'yunet',
    'centerface',
  ]

  # face Alignment (straighten cropped face)
  alignment_modes = [True, False]

  # defines quantity of extracted images
  image_quantity = len(extracted_filenames)

  image_base_noextension = None
  face_number = None

  for image in tqdm(extracted_filenames):

    current_image_dir = os.path.join(extracted_folder, image)

    current_image_base = os.path.basename(current_image_dir)
    image_base_noextension = current_image_base.rsplit('.', 1)[0] # no extension at the end

    # searching faces
    faces = DeepFace.extract_faces(
      img_path = current_image_dir,
      detector_backend = backends[5], # retinaface as main backend (idx 5)
      align = alignment_modes[1],
      enforce_detection = False, # If a face isn't detected in a image, ignores and returns empty (if true, returns error)
    )

    # read the input image
    img = cv2.imread(current_image_dir)

    face_number = 1

    # draw rectangle around the faces and crop the faces
    for face in faces:
      # access the 'facial_area' for coords
      facial_area = face['facial_area']
      x, y, w, h = facial_area['x'], facial_area['y'], facial_area['w'], facial_area['h'] # coords

      #cv2.rectangle(img, (x+w, y+h), (x, y), (255, 255, 255), 1) #commented, draws rectangle, not necessary for functionality

      faces_cropped = img[y:y + h, x:x + w]

      cv2.imwrite(f'{cropped_images_dir}/{image_base_noextension}@face{str(face_number)}@{str(x)}_{str(y)}_{str(w)}_{str(h)}.jpg', faces_cropped)
      face_number += 1

  print(f'{cropped_images_dir}/{image_base_noextension}@face{str(face_number)}@{str(x)}_{str(y)}_{str(w)}_{str(h)}.jpg')
  print("Face Cropping Finalized")

# Download and Unzip files - Run Face Cropping
Este bloco realiza duas funções:

1 - Fazer download dos .zips ainda não processados e separá-los em uma nova pasta com o nome do evento

2 - Executa o Script de crop de faces, e realiza upload em uma nova pasta, com o nome do evento

In [None]:
# Get all unprocessed files
answers = get_google_sheet_as_dataframe(ALBUMS_DOCS)
filtered_answers = answers[(answers["PROCESSADO?"] == False) & (answers["Qual seu nome?"].notna())] # gets all rows that are "PROCESSADO?" = False and Name is not NaN

files_to_unzip = []
for idx, answer in filtered_answers.iterrows():
  files_to_unzip.append({'id': answer[3].split('=')[1], 'event_name': answer[2]})

In [None]:
print('Starting to proccess files')
for file in tqdm(files_to_unzip):
  file_id = file['id']
  event_name = file['event_name']

  print(f'\nProcessing {event_name}')

  # download file
  zip_file_path = download_file(file_id, event_name)
  # unzip file
  extracted_path = unzip_file(zip_file_path, '/content/extracted')

  move_file_to_folder(file_id, PROCESSED_ZIPS_FOLDER) # move .zip to processed folder

  #cropped images colab folder creation
  cropped_images_path = create_folder('/content/cropped')
  # crop images
  crop_faces(extracted_path, cropped_images_path)

  # create necessary folders
  event_folder_id = create_drive_folder(event_name, ALBUMS_FOLDER)
  raw_images_folder_id = create_drive_folder('images', event_folder_id)
  cropped_images_folder_id = create_drive_folder('cropped', event_folder_id)

  # start to upload images (raw and cropped)
  upload_folder_to_drive(extracted_path, raw_images_folder_id)
  upload_folder_to_drive(cropped_images_path, cropped_images_folder_id)


  move_file_to_folder(file_id, PROCESSED_ZIPS_FOLDER)  # move .zip to processed folder
  !rm -r '/content/extracted' # delete extracted files in extracted_path
  !rm -rf {zip_file_path} # delete downloaded zip file
  !rm -rf {cropped_images_path} # delete cropped images

  print(f'Finished processing {event_name}')

Starting to proccess files


  0%|          | 0/2 [00:00<?, ?it/s]


Processing Oscar 2024
Download 100% complete.
File downloaded to /content/Oscar 2024
File unzipped to /content/extracted/2024 Oscars Red Carpet_ All the Best Photos - IMDb
File 1CLXzUVCqcbUaVFeZwOcguRSf76UKwfGL successfully moved to folder 1EIiWPBJuMbc1feUWrb3R5mwm72Gv4fm9.
Folder 'Oscar 2024' already exists with ID: 1KklcK6wl2NxHj4cdLF_-S_8FyHCUB-9m
Folder 'images' already exists with ID: 12w2GlKkWwmqh85FzOTFbs11BIwCO-Bt_
Uploading files



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:01<03:11,  1.93s/it][A
  2%|▏         | 2/100 [00:03<02:59,  1.83s/it][A
  3%|▎         | 3/100 [00:05<03:03,  1.89s/it][A
  4%|▍         | 4/100 [00:07<02:50,  1.78s/it][A
  5%|▌         | 5/100 [00:09<02:56,  1.86s/it][A
  6%|▌         | 6/100 [00:10<02:50,  1.81s/it][A
  7%|▋         | 7/100 [00:13<02:58,  1.92s/it][A
  8%|▊         | 8/100 [00:14<02:43,  1.78s/it][A
  9%|▉         | 9/100 [00:17<03:24,  2.25s/it][A
 10%|█         | 10/100 [00:20<03:27,  2.30s/it][A
 11%|█         | 11/100 [00:22<03:09,  2.13s/it][A
 12%|█▏        | 12/100 [00:24<03:05,  2.10s/it][A
 13%|█▎        | 13/100 [00:25<02:55,  2.01s/it][A
 14%|█▍        | 14/100 [00:28<03:06,  2.17s/it][A
 15%|█▌        | 15/100 [00:31<03:28,  2.45s/it][A
 16%|█▌        | 16/100 [00:35<04:08,  2.96s/it][A
 17%|█▋        | 17/100 [00:37<03:34,  2.59s/it][A
 18%|█▊        | 18/100 [00:39<03:18,  2.42s/it][A
 19%|█▉        | 19/100 [00:4

File 1CLXzUVCqcbUaVFeZwOcguRSf76UKwfGL successfully moved to folder 1EIiWPBJuMbc1feUWrb3R5mwm72Gv4fm9.
Finished processing Oscar 2024

Processing Grammy
Download 100% complete.
File downloaded to /content/Grammy
File unzipped to /content/extracted/76th Primetime Emmys Red Carpet_ All the Best Photos - IMDb
File 1DjFGbkk8GTs8vNLPKYOW-BQ9SOUgbHvI successfully moved to folder 1EIiWPBJuMbc1feUWrb3R5mwm72Gv4fm9.
Folder 'Grammy' already exists with ID: 11rDvTb0X8X_5DTQEQao8BZtXDtSjhKKK
Folder 'images' already exists with ID: 1VnHA6M2ZI1WH4UJpRBLyHwZeG9zE_j_f
Uploading files



  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:01<03:04,  1.86s/it][A
  2%|▏         | 2/100 [00:04<03:33,  2.18s/it][A
  3%|▎         | 3/100 [00:06<03:18,  2.04s/it][A
  4%|▍         | 4/100 [00:07<02:57,  1.85s/it][A
  5%|▌         | 5/100 [00:10<03:17,  2.07s/it][A
  6%|▌         | 6/100 [00:12<03:07,  1.99s/it][A
  7%|▋         | 7/100 [00:13<02:59,  1.93s/it][A
  8%|▊         | 8/100 [00:15<02:56,  1.92s/it][A
  9%|▉         | 9/100 [00:17<02:52,  1.90s/it][A
 10%|█         | 10/100 [00:19<02:48,  1.87s/it][A
 11%|█         | 11/100 [00:21<02:45,  1.86s/it][A
 12%|█▏        | 12/100 [00:23<02:46,  1.89s/it][A
 13%|█▎        | 13/100 [00:24<02:37,  1.81s/it][A
 14%|█▍        | 14/100 [00:26<02:46,  1.93s/it][A
 15%|█▌        | 15/100 [00:28<02:41,  1.90s/it][A
 16%|█▌        | 16/100 [00:31<02:58,  2.12s/it][A
 17%|█▋        | 17/100 [00:33<02:54,  2.10s/it][A
 18%|█▊        | 18/100 [00:35<02:46,  2.03s/it][A
 19%|█▉        | 19/100 [00:3

File 1DjFGbkk8GTs8vNLPKYOW-BQ9SOUgbHvI successfully moved to folder 1EIiWPBJuMbc1feUWrb3R5mwm72Gv4fm9.
Finished processing Grammy





## Update answers docs
Mark .zip's as processed

In [None]:
for idx, answer in answers.iterrows():
  if not isinstance(answer['Qual seu nome?'], float): # checks for NaNs
    answers.at[idx, 'PROCESSADO?'] = True

update_google_sheet_from_dataframe(ALBUMS_DOCS, answers)

Spreadsheet 1RcmynInVP3jFfeqd7g8ocq8El1iOg2Qa3VpBGc-_JNY updated successfully.
