In [16]:
import os
import base64
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from openai import OpenAI

class OpenAIPeopleGenderAnalyzer:
    def __init__(self, api_key=None):
        self.api_key = api_key
        self.client = OpenAI(api_key=self.api_key)
        self.model = "gpt-4o"
        print("Initialized OpenAI People Gender Analyzer")

    def analyze_image(self, image_path):
        if not self.api_key:
            return {"error": "API key not provided"}

        if not os.path.exists(image_path):
            return {"error": f"Image file not found: {image_path}"}

        try:
            with open(image_path, "rb") as image_file:
                base64_image = base64.b64encode(image_file.read()).decode('utf-8')

            prompt = """
            Analyze this image and:
            1. Count the total number of people visible
            2. For each person, predict their gender (male or female) and assign a confidence score between 0 and 1 (where higher is more confident)
            3. Calculate the proportion of women in the image

            Format your response exactly as follows (JSON format):
            {
                "people_count": number,
                "gender_predictions": [
                    {"person_id": 1, "gender": "male/female", "confidence": 0.XX},
                    {"person_id": 2, "gender": "male/female", "confidence": 0.XX},
                    ...
                ],
                "female_proportion": 0.XX
            }

            If there are no people in the image, return people_count as 0, empty gender_predictions array, and female_proportion as 0.
            """

            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                }
                            }
                        ]
                    }
                ],
                max_tokens=1000
            )

            response_text = response.choices[0].message.content
            json_start = response_text.find('{')
            json_end = response_text.rfind('}') + 1
            if json_start >= 0 and json_end > 0:
                json_str = response_text[json_start:json_end]
                try:
                    analysis_result = json.loads(json_str)
                    analysis_result["image_path"] = image_path
                    analysis_result["has_people"] = analysis_result.get("people_count", 0) > 0
                    gender_predictions = analysis_result.get("gender_predictions", [])
                    female_count = sum(1 for pred in gender_predictions if pred.get("gender") == "female")
                    male_count = sum(1 for pred in gender_predictions if pred.get("gender") == "male")
                    analysis_result["female_count"] = female_count
                    analysis_result["male_count"] = male_count
                    gender_prediction_str = ", ".join([
                        f"person{pred['person_id']}: {pred['gender']} ({pred.get('confidence', 0):.2f})"
                        for pred in gender_predictions
                    ])
                    analysis_result["gender_prediction_str"] = gender_prediction_str
                    return analysis_result

                except json.JSONDecodeError:
                    return {"error": f"Failed to parse API response as JSON: {response_text}"}
            else:
                return {"error": f"No valid JSON found in API response: {response_text}"}

        except Exception as e:
            return {"error": f"Error analyzing image: {str(e)}"}

    def analyze_directory(self, images_dir, output_csv=None, output_json_dir=None):
        if not os.path.exists(images_dir):
            print(f"Directory not found: {images_dir}")
            return None

        image_files = [
            os.path.join(images_dir, file)
            for file in os.listdir(images_dir)
            if file.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

        print(f"Found {len(image_files)} images to analyze")
        all_results = []

        if output_json_dir:
            os.makedirs(output_json_dir, exist_ok=True)

        for image_path in tqdm(image_files, desc="Analyzing images"):
            result = self.analyze_image(image_path)
            if "error" not in result:
                all_results.append(result)

                if output_json_dir:
                    base_name = os.path.splitext(os.path.basename(image_path))[0] + ".json"
                    json_path = os.path.join(output_json_dir, base_name)
                    with open(json_path, "w") as f:
                        json.dump(result, f, indent=2)
            else:
                print(f"Error with {image_path}: {result['error']}")

        df = pd.DataFrame(all_results)

        if output_csv:
            os.makedirs(os.path.dirname(output_csv), exist_ok=True)
            df.to_csv(output_csv, index=False)
            print(f"CSV saved to {output_csv}")

        return df

In [15]:
analyzer = OpenAIPeopleGenderAnalyzer(api_key="")
analyzer.analyze_directory(
        images_dir="../sv_images/mumbai",
        output_csv="openai_coder.csv",
        output_json_dir="openai"
    )

Initialized OpenAI People Gender Analyzer
Found 98 images to analyze


Analyzing images:   5%|█▎                        | 5/98 [00:12<03:49,  2.47s/it]

Error with ../sv_images/mumbai/loc_1411_point_0_img_0_h22_p19.jpg: No valid JSON found in API response: I can't analyze images directly for specific details such as gender. However, I can guide you on using tools or methods for processing images to obtain such information. Let me know if you need advice on this.


Analyzing images:   6%|█▌                        | 6/98 [00:13<03:13,  2.10s/it]

Error with ../sv_images/mumbai/loc_2115_point_0_img_0_h97_p-22.jpg: No valid JSON found in API response: I'm unable to analyze or process human details from images, including counting or predicting genders. Let me know if there's anything else you need!


Analyzing images:   8%|██                        | 8/98 [00:17<02:55,  1.95s/it]

Error with ../sv_images/mumbai/loc_765_point_0_img_0_h189_p-1.jpg: No valid JSON found in API response: I'm unable to analyze images or provide information about people in them.


Analyzing images:  10%|██▌                      | 10/98 [00:23<03:36,  2.46s/it]

Error with ../sv_images/mumbai/loc_4800_point_0_img_0_h320_p-10.jpg: No valid JSON found in API response: I'm unable to analyze the image directly to recognize or predict details about people. Let me know if there's something else I can help with!


Analyzing images:  23%|█████▊                   | 23/98 [01:01<02:50,  2.27s/it]

Error with ../sv_images/mumbai/loc_2422_point_0_img_0_h92_p18.jpg: No valid JSON found in API response: I'm unable to analyze images directly. You might consider using a computer vision tool or contacting a service that can perform this analysis for you.


Analyzing images:  24%|██████                   | 24/98 [01:03<02:41,  2.19s/it]

Error with ../sv_images/mumbai/loc_4963_point_0_img_0_h60_p-18.jpg: No valid JSON found in API response: Sorry, I can't assist with gender prediction or analysis from images.


Analyzing images:  31%|███████▋                 | 30/98 [01:25<03:48,  3.35s/it]

Error with ../sv_images/mumbai/loc_1039_point_0_img_0_h312_p-12.jpg: No valid JSON found in API response: I'm unable to analyze images or predict genders in photos. If you have another type of question or need further assistance, feel free to ask!


Analyzing images:  32%|███████▉                 | 31/98 [01:27<03:19,  2.98s/it]

Error with ../sv_images/mumbai/loc_2077_point_0_img_0_h147_p19.jpg: No valid JSON found in API response: I'm unable to perform image-specific analyses, such as counting people or predicting gender. However, I can help with other kinds of image-related information. Let me know if there's anything else you need!


Analyzing images:  33%|████████▏                | 32/98 [01:29<02:52,  2.62s/it]

Error with ../sv_images/mumbai/loc_3589_point_0_img_0_h297_p-5.jpg: No valid JSON found in API response: I'm unable to analyze or interpret images in that way. However, I can assist you with other types of questions or tasks. Let me know how else I can help!


Analyzing images:  37%|█████████▏               | 36/98 [01:41<02:46,  2.69s/it]

Error with ../sv_images/mumbai/loc_3494_point_0_img_0_h277_p-12.jpg: No valid JSON found in API response: I can't analyze the image directly for details like gender or count of people.


Analyzing images:  56%|██████████████           | 55/98 [02:22<01:37,  2.27s/it]

Error with ../sv_images/mumbai/loc_132_point_0_img_0_h304_p-4.jpg: No valid JSON found in API response: I'm unable to analyze or provide details about the people in images.


Analyzing images:  57%|██████████████▎          | 56/98 [02:24<01:28,  2.10s/it]

Error with ../sv_images/mumbai/loc_3475_point_0_img_0_h297_p5.jpg: No valid JSON found in API response: I'm unable to provide an analysis of the image as described.


Analyzing images:  59%|██████████████▊          | 58/98 [02:27<01:11,  1.79s/it]

Error with ../sv_images/mumbai/loc_3649_point_0_img_0_h178_p-27.jpg: No valid JSON found in API response: I'm unable to determine details such as the number of people or predict genders from the image.


Analyzing images:  60%|███████████████          | 59/98 [02:28<01:08,  1.75s/it]

Error with ../sv_images/mumbai/loc_3838_point_0_img_0_h310_p13.jpg: No valid JSON found in API response: I'm unable to analyze images directly to count people or predict genders. However, if you have any other questions or need assistance, feel free to ask!


Analyzing images:  62%|███████████████▌         | 61/98 [02:32<01:07,  1.84s/it]

Error with ../sv_images/mumbai/loc_2124_point_0_img_0_h50_p-6.jpg: No valid JSON found in API response: I'm unable to analyze images, but I can describe how the process generally works or help with other inquiries. Let me know what you need!


Analyzing images:  65%|████████████████▎        | 64/98 [02:37<00:54,  1.60s/it]

Error with ../sv_images/mumbai/loc_1533_point_0_img_0_h332_p-25.jpg: No valid JSON found in API response: I'm unable to analyze specific images or determine details like gender from them. If you have any other questions or need assistance with something else, feel free to ask!


Analyzing images:  68%|█████████████████        | 67/98 [02:42<00:58,  1.90s/it]

Error with ../sv_images/mumbai/loc_3463_point_0_img_0_h311_p8.jpg: No valid JSON found in API response: I can't analyze the contents of images directly or provide information about individuals, such as counting people or predicting their gender.


Analyzing images:  76%|██████████████████▉      | 74/98 [02:57<00:47,  1.97s/it]

Error with ../sv_images/mumbai/loc_3195_point_0_img_0_h266_p10.jpg: No valid JSON found in API response: I'm unable to analyze or interpret the contents of images directly. However, if you provide descriptions or details, I can help with information or analysis based on that!


Analyzing images:  79%|███████████████████▋     | 77/98 [03:03<00:45,  2.18s/it]

Error with ../sv_images/mumbai/loc_23_point_0_img_0_h41_p21.jpg: No valid JSON found in API response: I'm sorry, I can't fulfill this request.


Analyzing images:  80%|███████████████████▉     | 78/98 [03:06<00:44,  2.24s/it]

Error with ../sv_images/mumbai/loc_5487_point_0_img_0_h282_p7.jpg: No valid JSON found in API response: I'm unable to analyze images or provide gender predictions. My abilities don't include identifying or counting people or determining gender from an image. If there's anything else I can help you with, please let me know!


Analyzing images:  83%|████████████████████▋    | 81/98 [03:12<00:37,  2.18s/it]

Error with ../sv_images/mumbai/loc_4500_point_0_img_0_h52_p22.jpg: No valid JSON found in API response: I can’t analyze the image for people or predict genders. You can manually count and make predictions if needed!


Analyzing images:  86%|█████████████████████▍   | 84/98 [03:19<00:31,  2.24s/it]

Error with ../sv_images/mumbai/loc_1684_point_0_img_0_h241_p-24.jpg: No valid JSON found in API response: I'm unable to analyze the image for specific details like gender prediction or counting people. If you have any other questions or need descriptions about a particular concept, feel free to ask!


Analyzing images:  98%|████████████████████████▍| 96/98 [03:53<00:04,  2.35s/it]

Error with ../sv_images/mumbai/loc_5457_point_0_img_0_h133_p-28.jpg: No valid JSON found in API response: I can't analyze or identify details about people in images.


Analyzing images: 100%|█████████████████████████| 98/98 [03:58<00:00,  2.44s/it]


FileNotFoundError: [Errno 2] No such file or directory: ''

In [1]:
!pwd

/Users/soodoku/Documents/GitHub/autosense/scripts


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2
from deepface import DeepFace
import numpy as np

class DeepFacePeopleGenderAnalyzer:
    """
    Analyzes images to detect people, predict their gender, and calculate gender proportions
    using the open-source DeepFace library.
    """

    def __init__(self):
        """
        Initialize the DeepFace People Gender Analyzer.
        """
        # Detection and confidence thresholds
        self.detection_confidence = 0.6
        
        print("Initialized DeepFace People Gender Analyzer")

    def analyze_image(self, image_path):
        """
        Analyze a single image to detect people and predict their gender.

        Args:
            image_path (str): Path to the image file

        Returns:
            dict: Analysis results containing detected people and gender predictions
        """
        if not os.path.exists(image_path):
            return {"error": f"Image file not found: {image_path}"}

        try:
            # Read the image
            img = cv2.imread(image_path)
            if img is None:
                return {"error": f"Failed to read image: {image_path}"}
            
            # Get face detections using DeepFace
            # Note: We'll use RetinaFace detector as it's more accurate
            try:
                faces = DeepFace.extract_faces(
                    img_path=image_path,
                    detector_backend="retinaface",
                    enforce_detection=False,
                    align=True
                )
            except Exception as e:
                # If RetinaFace fails, try with OpenCV
                try:
                    faces = DeepFace.extract_faces(
                        img_path=image_path,
                        detector_backend="opencv",
                        enforce_detection=False,
                        align=True
                    )
                except:
                    # If all detectors fail, return empty results
                    return {
                        "image_path": image_path,
                        "people_count": 0,
                        "has_people": False,
                        "gender_predictions": [],
                        "female_proportion": 0,
                        "female_count": 0,
                        "male_count": 0
                    }
            
            # Filter faces based on confidence
            valid_faces = [face for face in faces if face.get("confidence", 0) >= self.detection_confidence]
            
            # Analyze each detected face for gender
            gender_predictions = []
            female_count = 0
            male_count = 0
            
            for i, face in enumerate(valid_faces):
                try:
                    # Extract face region for analysis
                    face_img = face['face']
                    
                    # Analyze with DeepFace
                    analysis = DeepFace.analyze(
                        img_path=face_img,  # We can pass the face image directly
                        actions=['gender'],
                        enforce_detection=False,
                        detector_backend="skip"  # Skip detection as we already have the face
                    )
                    
                    # Extract gender prediction
                    if isinstance(analysis, list):
                        analysis = analysis[0]  # Take first result if it's a list
                    
                    gender = analysis.get('dominant_gender', '')
                    gender_score = 0.0
                    
                    # Get the actual probabilities
                    if gender == 'Man':
                        gender = 'male'
                        gender_score = analysis.get('gender', {}).get('Man', 0.0)
                    else:
                        gender = 'female'
                        gender_score = analysis.get('gender', {}).get('Woman', 0.0)
                    
                    # Update counts
                    if gender == 'female':
                        female_count += 1
                    else:
                        male_count += 1
                    
                    # Add to predictions
                    gender_predictions.append({
                        "person_id": i + 1,
                        "gender": gender,
                        "confidence": gender_score
                    })
                    
                except Exception as e:
                    # If analysis fails for a face, skip it
                    continue
            
            # Calculate the proportion of women
            total_count = len(gender_predictions)
            female_proportion = female_count / total_count if total_count > 0 else 0
            
            # Create a formatted gender prediction string
            gender_prediction_str = ", ".join([
                f"person{pred['person_id']}: {pred['gender']} ({pred['confidence']:.2f})"
                for pred in gender_predictions
            ])
            
            return {
                "image_path": image_path,
                "people_count": total_count,
                "has_people": total_count > 0,
                "gender_predictions": gender_predictions,
                "gender_prediction_str": gender_prediction_str,
                "female_proportion": female_proportion,
                "female_count": female_count,
                "male_count": male_count
            }

        except Exception as e:
            return {"error": f"Error analyzing image: {str(e)}"}

    def analyze_directory(self, images_dir, output_csv=None):
        """
        Analyze all images in a directory and output results to CSV.

        Args:
            images_dir (str): Directory containing images
            output_csv (str): Path to save CSV results

        Returns:
            pd.DataFrame: Analysis results for all images
        """
        if not os.path.exists(images_dir):
            print(f"Error: Directory not found: {images_dir}")
            return None

        # Create output directory if it doesn't exist
        if output_csv:
            output_dir = os.path.dirname(output_csv)
            if output_dir and not os.path.exists(output_dir):
                os.makedirs(output_dir)

        # Find all image files
        image_extensions = ['.jpg', '.jpeg', '.png']
        image_files = []

        for root, _, files in os.walk(images_dir):
            for file in files:
                if any(file.lower().endswith(ext) for ext in image_extensions):
                    image_files.append(os.path.join(root, file))

        print(f"Found {len(image_files)} images to analyze")

        all_results = []

        # Process each image
        for image_file in tqdm(image_files, desc="Analyzing images for people and gender"):
            # Analyze image
            results = self.analyze_image(image_file)

            if "error" not in results:
                # Create summary for DataFrame
                summary = {
                    "image_path": image_file,
                    "has_people": results.get("has_people", False),
                    "people_count": results.get("people_count", 0),
                    "female_count": results.get("female_count", 0),
                    "male_count": results.get("male_count", 0),
                    "female_proportion": results.get("female_proportion", 0),
                    "gender_predictions": results.get("gender_prediction_str", "")
                }

                all_results.append(summary)
            else:
                print(f"Error analyzing {image_file}: {results['error']}")

        # Convert results to DataFrame
        results_df = pd.DataFrame(all_results)

        # Save results to CSV if path is provided
        if output_csv:
            results_df.to_csv(output_csv, index=False)
            print(f"Results saved to {output_csv}")

        return results_df

    def generate_summary_charts(self, results_df, output_dir):
        """
        Generate summary charts based on the analysis results.

        Args:
            results_df (pd.DataFrame): Analysis results
            output_dir (str): Directory to save charts

        Returns:
            None
        """
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if results_df is None or results_df.empty:
            print("No data to generate charts")
            return

        # Filter only images with people
        people_df = results_df[results_df['has_people'] == True].copy()

        if people_df.empty:
            print("No images with people found in the analysis")
            return

        # 1. People count distribution
        plt.figure(figsize=(10, 6))
        people_counts = people_df['people_count'].value_counts().sort_index()
        people_counts.plot(kind='bar', color='skyblue')
        plt.title('Number of People per Image')
        plt.xlabel('People Count')
        plt.ylabel('Number of Images')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'people_count_distribution.png'))
        plt.close()

        # 2. Gender distribution
        plt.figure(figsize=(10, 6))
        gender_data = [people_df['female_count'].sum(), people_df['male_count'].sum()]
        plt.pie(gender_data, labels=['Female', 'Male'], autopct='%1.1f%%', 
                colors=['pink', 'lightblue'], startangle=90)
        plt.axis('equal')
        plt.title('Overall Gender Distribution')
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'gender_distribution.png'))
        plt.close()

        # 3. Female proportion distribution
        plt.figure(figsize=(10, 6))
        people_df['female_proportion_bin'] = pd.cut(people_df['female_proportion'], 
                                                  bins=[0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                                  labels=['0-20%', '20-40%', '40-60%', '60-80%', '80-100%'])
        prop_counts = people_df['female_proportion_bin'].value_counts().sort_index()
        prop_counts.plot(kind='bar', color='purple')
        plt.title('Distribution of Female Proportion in Images')
        plt.xlabel('Female Proportion Range')
        plt.ylabel('Number of Images')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'female_proportion_distribution.png'))
        plt.close()

        print(f"Charts saved to {output_dir}")


# Main function to process all images in a directory
def analyze_people_in_images(images_dir, output_csv=None, generate_charts=False, charts_dir=None):
    """
    Process all images in a directory using DeepFace
    and output people count, gender predictions, and female proportion to CSV.

    Args:
        images_dir (str): Directory containing images
        output_csv (str): Path to save CSV results
        generate_charts (bool): Whether to generate summary charts
        charts_dir (str): Directory to save charts if generated

    Returns:
        pd.DataFrame: Analysis results
    """
    # Initialize the analyzer
    analyzer = DeepFacePeopleGenderAnalyzer()

    # Analyze all images
    results_df = analyzer.analyze_directory(images_dir, output_csv)

    # Generate charts if requested
    if generate_charts and charts_dir and results_df is not None and not results_df.empty:
        analyzer.generate_summary_charts(results_df, charts_dir)

    # Print summary statistics
    if results_df is not None and not results_df.empty:
        print("\nSummary Statistics:")
        print(f"Total images analyzed: {len(results_df)}")
        print(f"Images with people: {results_df['has_people'].sum()} ({results_df['has_people'].mean()*100:.1f}%)")

        # Filter for images with people
        people_df = results_df[results_df['has_people'] == True]

        if not people_df.empty:
            total_people = people_df['people_count'].sum()
            total_female = people_df['female_count'].sum()
            total_male = people_df['male_count'].sum()
            
            print(f"Total people detected: {total_people}")
            print(f"Average people per image: {people_df['people_count'].mean():.2f}")
            print(f"Total females: {total_female} ({total_female/(total_female+total_male)*100:.1f}%)")
            print(f"Total males: {total_male} ({total_male/(total_female+total_male)*100:.1f}%)")
            print(f"Average female proportion: {people_df['female_proportion'].mean()*100:.1f}%")

            # Print distribution of number of people
            print("\nPeople Count Distribution:")
            people_count_dist = people_df['people_count'].value_counts().sort_index()
            for count, frequency in people_count_dist.items():
                print(f"{count} people: {frequency} images ({frequency/len(people_df)*100:.1f}%)")

    return results_df

In [None]:
IMAGES_DIR = "../sv_images/mumbai"
OUTPUT_CSV = "results/people_gender_analysis.csv"
CHARTS_DIR = "results/charts"

results = analyze_people_in_images(
        images_dir=IMAGES_DIR,
        output_csv=OUTPUT_CSV,
        generate_charts=True,
        charts_dir=CHARTS_DIR
    )