-
Notifications
You must be signed in to change notification settings - Fork 7.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Face-Detecton on ESP32-S3 without Wifi / Webserver is very poor #9671
Comments
Please use the latest core (either github with get.py or dev release 3.0.0-rc3). Camera example works fine and you should also have fast detection without the web server. |
Here is an example sketch that runs on the latest core and only does face detection (takes 70-120ms depending on format and image) #include "esp_timer.h"
#include "esp_camera.h"
#include "img_converters.h"
#include <vector>
#include "human_face_detect_msr01.hpp"
#include "human_face_detect_mnp01.hpp"
#define TWO_STAGE 1 /*<! 1: detect by two-stage which is more accurate but slower(with keypoints). */
/*<! 0: detect by one-stage which is less accurate but faster(without keypoints). */
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 15
#define SIOD_GPIO_NUM 4
#define SIOC_GPIO_NUM 5
#define Y2_GPIO_NUM 11
#define Y3_GPIO_NUM 9
#define Y4_GPIO_NUM 8
#define Y5_GPIO_NUM 10
#define Y6_GPIO_NUM 12
#define Y7_GPIO_NUM 18
#define Y8_GPIO_NUM 17
#define Y9_GPIO_NUM 16
#define VSYNC_GPIO_NUM 6
#define HREF_GPIO_NUM 7
#define PCLK_GPIO_NUM 13
bool face_detect() {
int64_t fr_start = esp_timer_get_time();
camera_fb_t *fb = esp_camera_fb_get();
if (!fb) {
Serial.println("Camera capture failed");
return false;
}
size_t out_len, out_width, out_height;
uint8_t *out_buf;
bool detected = false;
if (fb->format == PIXFORMAT_RGB565) {
#if TWO_STAGE
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
std::list<dl::detect::result_t> &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
std::list<dl::detect::result_t> &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates);
#else
HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
std::list<dl::detect::result_t> &results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
#endif
if (results.size() > 0) {
detected = true;
}
esp_camera_fb_return(fb);
} else {
out_len = fb->width * fb->height * 3;
out_width = fb->width;
out_height = fb->height;
out_buf = (uint8_t *)malloc(out_len);
if (!out_buf) {
Serial.println("out_buf malloc failed");
return false;
}
bool s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf);
esp_camera_fb_return(fb);
if (!s) {
free(out_buf);
Serial.println("To rgb888 failed");
return false;
}
#if TWO_STAGE
HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F);
HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5);
std::list<dl::detect::result_t> &candidates = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
std::list<dl::detect::result_t> &results = s2.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}, candidates);
#else
HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F);
std::list<dl::detect::result_t> &results = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3});
#endif
if (results.size() > 0) {
detected = true;
}
free(out_buf);
}
int64_t fr_end = esp_timer_get_time();
Serial.printf("FACE: %lums %s\n", (uint32_t)((fr_end - fr_start) / 1000), detected ? "DETECTED " : "");
return true;
}
void setup() {
Serial.begin(115200);
Serial.setDebugOutput(true);
Serial.println();
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sccb_sda = SIOD_GPIO_NUM;
config.pin_sccb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.frame_size = FRAMESIZE_UXGA;
//config.pixel_format = PIXFORMAT_JPEG; // for streaming
config.pixel_format = PIXFORMAT_RGB565; // for face detection/recognition
config.grab_mode = CAMERA_GRAB_WHEN_EMPTY;
config.fb_location = CAMERA_FB_IN_PSRAM;
config.jpeg_quality = 12;
config.fb_count = 1;
// if PSRAM IC present, init with UXGA resolution and higher JPEG quality
// for larger pre-allocated frame buffer.
if (config.pixel_format == PIXFORMAT_JPEG) {
if (psramFound()) {
config.jpeg_quality = 10;
config.fb_count = 2;
config.grab_mode = CAMERA_GRAB_LATEST;
} else {
// Limit the frame size when PSRAM is not available
config.frame_size = FRAMESIZE_SVGA;
config.fb_location = CAMERA_FB_IN_DRAM;
}
} else {
// Best option for face detection/recognition
config.frame_size = FRAMESIZE_240X240;
config.fb_count = 2;
}
// camera init
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x\n", err);
return;
}
sensor_t *s = esp_camera_sensor_get();
// initial sensors are flipped vertically and colors are a bit saturated
if (s->id.PID == OV3660_PID) {
s->set_vflip(s, 1); // flip it back
s->set_brightness(s, 1); // up the brightness just a bit
s->set_saturation(s, -2); // lower the saturation
}
// drop down frame size for higher initial frame rate
if (config.pixel_format == PIXFORMAT_JPEG) {
s->set_framesize(s, FRAMESIZE_QVGA);
}
s->set_vflip(s, 1);
}
void loop() {
face_detect();
} |
Very many thanks for the above info. It looks like my recent “github.com/espressif/arduino-esp32” of 11May24 had got corrupted somehow, as the “Examples/ESP32/Camera/CameraWebServer - FACE_DETECT” does work fine using today’s “github.com/espressif/arduino-esp32” (23May23). Thank you also for your above example sketch. Note that I had to comment-out the “s->set_vflip(s, 1);” line to get it to work with my OV2640 camera, as Face-Detect only works if the face is the correct-way-up, and the current OV2640 camera I am using does NOT need the image inverting. (I have other OV2640 cameras that do require this though.) It might be worth adding a comment regarding this after the “s->set_vflip(s, 1);” line? With the github software of 23May24 my above sketch actually runs much better than it did on the 16Jan23 github software, and is now pretty much as good as the example-WiFi-Face-Detect, which surprised me as using the 16Jan23 software my above sketch was not nearly as good as the example-WiFi-Face-Detect. I’m not sure why this should be, but I’m happy now using the 23May24 github software! I seem to get a little better Face-Detection-results using my above sketch (running the Face-Detect on Core0 with priority [configMAX_PRIORITIES-2] and no-wifi), than when using your above sketch running all on Core1. By “Face-Detection-results” I mean I can (in good natural light) pretty reliably detect a face from anywhere between 30cm and 60cm from the camera, and at an angle of about +/- 22 degrees (about 45degrees in total) horizontally from dead-centre on my ~65degrees FOV camera. (It doesn’t work nearly as well in poor light.) (cf. With your above sketch [all on Core1] I can only get reliable detection between perhaps 30cm and 45cm, and over an angle of perhaps +/- 15degrees, BUT this is rather subjective, so I’m not 100% sure.) PS. Can you tell me if there is a simpler way to get the centre of the first detected face than calling “draw_face_boxes” and using “x = (int)prediction->box[0];” etc with the formulas:
PPS. It might be worth adding your above sketch to the “Examples/ESP32/Camera” folder next to the CameraWebServer.ino example? |
Board
Firebeetle2 ESP32-S3
Device Description
Firebeetle2 ESP32-S3 with OV2640 camera attached.
Hardware Configuration
OV2640 68degree standard camera (note that face-detection seems to work even more poorly with wide-angle cameras!)
I also use an active-buzzer on pin A5, but this is not necessary as "Got Face" is also printed to the Serial-terminal.
Version
latest master (checkout manually)
IDE Name
Arduino IDE v1.8.19
Operating System
Windows10
Flash frequency
80Mz
PSRAM enabled
yes
Upload speed
921600
Description
Face-Detecton on ESP32-S3 does not work nearly as well using the below sketch WITHOUT wifi, as it does for the default "Examples/ESP32/Camera/CameraWebServer" wifi example.
This using identical camera-setup (OV2640 68degree standard camera, PIXFORMAT_RGB565, FRAMESIZE_240X240), and identical "TWO_STAGE" face-detection, and running the below "look_for_faces()" task on Core0, which is where I beleive the equivalent "app_httpd.cpp" task is run for the wifi-example.
NB. CONFIG_ESP_FACE_DETECT_ENABLED=1 enabled CONFIG_ESP_FACE_RECOGNITION_ENABLED=0 (ie. detection NOT recognition)
Specifically the below sketch only finds a face if the face is approx 30cm to 45cm away and pretty much in the exact centre of the frame; whereas the "Examples/ESP32/Camera/CameraWebServer" wifi example with identical setup will finds a face pretty reliably between 30cm and 60cm away, and almost anywhere from -22degrees to +22degrees (45degrees total) horizontally from the centre of the frame.
NB. I am using "github.com/espressif/arduino-esp32" as at 16Jan23.
[as the "Examples/ESP32/Camera/CameraWebServer - FACE_DETECT" doesn't work on the current github when using the above ESP32-S3. On the current github 11May24 you even have to modify the example sketch by moving the WiFi.begin() above the esp_camera_init() or else it wont even connect to the wifi, but even then face-detection doesn't work. (Arduino-on-core1, Events-on-core1-or-core0.)] <- Ignore this as I suspect my github 11May24 software was corrupted.
But note that this below (non-wifi) sketch runs the same on both github v16Jan23 and github v11May24.
Sketch
Debug Message
Other Steps to Reproduce
No response
I have checked existing issues, online documentation and the Troubleshooting Guide
The text was updated successfully, but these errors were encountered: