Skip to content
Permalink
Browse files
Update media state for active speech recognition as it uses audio cap…
…ture

https://bugs.webkit.org/show_bug.cgi?id=220667

Patch by Sihui Liu <sihui_liu@appe.com> on 2021-01-19
Reviewed by Youenn Fablet.

Source/WebCore:

To make sure the media capture state is correctly sent to client.

API test: WebKit2.SpeechRecognitionMediaCaptureStateChange

* Modules/speech/SpeechRecognition.cpp:
(WebCore::SpeechRecognition::startRecognition):
(WebCore::SpeechRecognition::stop):
(WebCore::SpeechRecognition::didStartCapturingAudio):
(WebCore::SpeechRecognition::didStopCapturingAudio):
* Modules/speech/SpeechRecognition.h:
* Modules/speech/SpeechRecognitionConnection.h:
* dom/Document.cpp:
(WebCore::Document::setActiveSpeechRecognition):
(WebCore::Document::updateIsPlayingMedia):
* dom/Document.h:
* page/DummySpeechRecognitionProvider.h:

Source/WebKit:

* WebProcess/WebCoreSupport/WebSpeechRecognitionConnection.cpp:
(WebKit::WebSpeechRecognitionConnection::unregisterClient):
* WebProcess/WebCoreSupport/WebSpeechRecognitionConnection.h:

Tools:

* TestWebKitAPI/Tests/WebKitCocoa/SpeechRecognition.mm:
(-[SpeechRecognitionUIDelegate _webView:mediaCaptureStateDidChange:]):
(TestWebKitAPI::TEST):
(-[SpeechRecognitionPermissionUIDelegate _webView:requestSpeechRecognitionPermissionForOrigin:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate _webView:requestMediaCaptureAuthorization:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate _webView:checkUserMediaPermissionForURL:mainFrameURL:frameIdentifier:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate webView:createWebViewWithConfiguration:forNavigationAction:windowFeatures:]): Deleted.

Canonical link: https://commits.webkit.org/233162@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@271636 268f45cc-cd09-0410-ab3c-d52691b4dbfc
  • Loading branch information
szewai authored and webkit-commit-queue committed Jan 20, 2021
1 parent a1fd940 commit 10a3c2f3935666b00ebd241079441d0a2ee147cc
Showing 12 changed files with 131 additions and 8 deletions.
@@ -1,3 +1,27 @@
2021-01-19 Sihui Liu <sihui_liu@appe.com>

Update media state for active speech recognition as it uses audio capture
https://bugs.webkit.org/show_bug.cgi?id=220667

Reviewed by Youenn Fablet.

To make sure the media capture state is correctly sent to client.

API test: WebKit2.SpeechRecognitionMediaCaptureStateChange

* Modules/speech/SpeechRecognition.cpp:
(WebCore::SpeechRecognition::startRecognition):
(WebCore::SpeechRecognition::stop):
(WebCore::SpeechRecognition::didStartCapturingAudio):
(WebCore::SpeechRecognition::didStopCapturingAudio):
* Modules/speech/SpeechRecognition.h:
* Modules/speech/SpeechRecognitionConnection.h:
* dom/Document.cpp:
(WebCore::Document::setActiveSpeechRecognition):
(WebCore::Document::updateIsPlayingMedia):
* dom/Document.h:
* page/DummySpeechRecognitionProvider.h:

2021-01-19 Megan Gardner <megan_gardner@apple.com>

Elements in a table are incorrectly selected in JavaScript.
@@ -99,6 +99,15 @@ const char* SpeechRecognition::activeDOMObjectName() const
return "SpeechRecognition";
}

void SpeechRecognition::stop()
{
abortRecognition();
m_connection->unregisterClient(*this);

auto& document = downcast<Document>(*scriptExecutionContext());
document.setActiveSpeechRecognition(nullptr);
}

void SpeechRecognition::didStart()
{
if (m_state == State::Starting)
@@ -109,6 +118,9 @@ void SpeechRecognition::didStart()

void SpeechRecognition::didStartCapturingAudio()
{
auto& document = downcast<Document>(*scriptExecutionContext());
document.setActiveSpeechRecognition(this);

queueTaskToDispatchEvent(*this, TaskSource::Speech, Event::create(eventNames().audiostartEvent, Event::CanBubble::No, Event::IsCancelable::No));
}

@@ -134,6 +146,9 @@ void SpeechRecognition::didStopCapturingSound()

void SpeechRecognition::didStopCapturingAudio()
{
auto& document = downcast<Document>(*scriptExecutionContext());
document.setActiveSpeechRecognition(nullptr);

queueTaskToDispatchEvent(*this, TaskSource::Speech, Event::create(eventNames().audioendEvent, Event::CanBubble::No, Event::IsCancelable::No));
}

@@ -85,8 +85,9 @@ class SpeechRecognition : public SpeechRecognitionConnectionClient, public Activ
void didEnd() final;

// ActiveDOMObject
const char* activeDOMObjectName() const;
void suspend(ReasonForSuspension);
const char* activeDOMObjectName() const final;
void suspend(ReasonForSuspension) final;
void stop() final;

// EventTarget
ScriptExecutionContext* scriptExecutionContext() const final { return ActiveDOMObject::scriptExecutionContext(); }
@@ -37,6 +37,7 @@ class SpeechRecognitionConnection : public RefCounted<SpeechRecognitionConnectio
public:
virtual ~SpeechRecognitionConnection() { }
virtual void registerClient(SpeechRecognitionConnectionClient&) = 0;
virtual void unregisterClient(SpeechRecognitionConnectionClient&) = 0;
virtual void start(SpeechRecognitionConnectionClientIdentifier, const String& lang, bool continuous, bool interimResults, uint64_t maxAlternatives, ClientOrigin&&) = 0;
virtual void stop(SpeechRecognitionConnectionClientIdentifier) = 0;
virtual void abort(SpeechRecognitionConnectionClientIdentifier) = 0;
@@ -208,6 +208,7 @@
#include "Settings.h"
#include "ShadowRoot.h"
#include "SocketProvider.h"
#include "SpeechRecognition.h"
#include "StorageEvent.h"
#include "StringCallback.h"
#include "StyleAdjuster.h"
@@ -4253,6 +4254,15 @@ void Document::removeAudioProducer(MediaProducer& audioProducer)
updateIsPlayingMedia();
}

void Document::setActiveSpeechRecognition(SpeechRecognition* speechRecognition)
{
if (m_activeSpeechRecognition == speechRecognition)
return;

m_activeSpeechRecognition = makeWeakPtr(speechRecognition);
updateIsPlayingMedia();
}

void Document::noteUserInteractionWithMediaElement()
{
if (m_userHasInteractedWithMediaElement)
@@ -4274,6 +4284,8 @@ void Document::updateIsPlayingMedia(uint64_t sourceElementID)

#if ENABLE(MEDIA_STREAM)
state |= MediaStreamTrack::captureState(*this);
if (m_activeSpeechRecognition)
state |= MediaProducer::HasActiveAudioCaptureDevice;
#endif

if (m_userHasInteractedWithMediaElement)
@@ -208,6 +208,7 @@ class SelectorQuery;
class SelectorQueryCache;
class SerializedScriptValue;
class Settings;
class SpeechRecognition;
class StringCallback;
class StyleSheet;
class StyleSheetContents;
@@ -1381,6 +1382,7 @@ class Document

WEBCORE_EXPORT void addAudioProducer(MediaProducer&);
WEBCORE_EXPORT void removeAudioProducer(MediaProducer&);
void setActiveSpeechRecognition(SpeechRecognition*);
MediaProducer::MediaStateFlags mediaState() const { return m_mediaState; }
void noteUserInteractionWithMediaElement();
bool isCapturing() const { return MediaProducer::isCapturing(m_mediaState); }
@@ -1971,6 +1973,7 @@ class Document
Ref<CSSFontSelector> m_fontSelector;

WeakHashSet<MediaProducer> m_audioProducers;
WeakPtr<SpeechRecognition> m_activeSpeechRecognition;

HashSet<ShadowRoot*> m_inDocumentShadowRoots;

@@ -39,6 +39,7 @@ class DummySpeechRecognitionProvider final : public SpeechRecognitionProvider {
return adoptRef(*new DummySpeechRecognitionConnection());
}
void registerClient(SpeechRecognitionConnectionClient&) final { }
void unregisterClient(SpeechRecognitionConnectionClient&) final { }
void start(SpeechRecognitionConnectionClientIdentifier, const String&, bool, bool, uint64_t, ClientOrigin&&) final { }
void stop(SpeechRecognitionConnectionClientIdentifier) final { }
void abort(SpeechRecognitionConnectionClientIdentifier) final { }
@@ -1,3 +1,14 @@
2021-01-19 Sihui Liu <sihui_liu@appe.com>

Update media state for active speech recognition as it uses audio capture
https://bugs.webkit.org/show_bug.cgi?id=220667

Reviewed by Youenn Fablet.

* WebProcess/WebCoreSupport/WebSpeechRecognitionConnection.cpp:
(WebKit::WebSpeechRecognitionConnection::unregisterClient):
* WebProcess/WebCoreSupport/WebSpeechRecognitionConnection.h:

2021-01-19 Chris Dumez <cdumez@apple.com>

[Hardening] Protect against overflows in ArgumentCoder<ArrayReference<T, arrayReferenceDynamicExtent>>::decode()
@@ -63,6 +63,11 @@ void WebSpeechRecognitionConnection::registerClient(WebCore::SpeechRecognitionCo
m_clientMap.add(client.identifier(), makeWeakPtr(client));
}

void WebSpeechRecognitionConnection::unregisterClient(WebCore::SpeechRecognitionConnectionClient& client)
{
m_clientMap.remove(client.identifier());
}

void WebSpeechRecognitionConnection::start(WebCore::SpeechRecognitionConnectionClientIdentifier clientIdentifier, const String& lang, bool continuous, bool interimResults, uint64_t maxAlternatives, WebCore::ClientOrigin&& clientOrigin)
{
send(Messages::SpeechRecognitionServer::Start(clientIdentifier, lang, continuous, interimResults, maxAlternatives, WTFMove(clientOrigin)));
@@ -56,6 +56,7 @@ class WebSpeechRecognitionConnection final : public WebCore::SpeechRecognitionCo
~WebSpeechRecognitionConnection();

void registerClient(WebCore::SpeechRecognitionConnectionClient&) final;
void unregisterClient(WebCore::SpeechRecognitionConnectionClient&) final;
void didReceiveUpdate(WebCore::SpeechRecognitionUpdate&&) final;
void invalidate(WebCore::SpeechRecognitionConnectionClientIdentifier);

@@ -1,3 +1,18 @@
2021-01-19 Sihui Liu <sihui_liu@appe.com>

Update media state for active speech recognition as it uses audio capture
https://bugs.webkit.org/show_bug.cgi?id=220667

Reviewed by Youenn Fablet.

* TestWebKitAPI/Tests/WebKitCocoa/SpeechRecognition.mm:
(-[SpeechRecognitionUIDelegate _webView:mediaCaptureStateDidChange:]):
(TestWebKitAPI::TEST):
(-[SpeechRecognitionPermissionUIDelegate _webView:requestSpeechRecognitionPermissionForOrigin:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate _webView:requestMediaCaptureAuthorization:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate _webView:checkUserMediaPermissionForURL:mainFrameURL:frameIdentifier:decisionHandler:]): Deleted.
(-[SpeechRecognitionPermissionUIDelegate webView:createWebViewWithConfiguration:forNavigationAction:windowFeatures:]): Deleted.

2021-01-19 Michael Catanzaro <mcatanzaro@gnome.org>

REGRESSION(r271506): webkit-patch keyring integration is broken on Linux
@@ -38,17 +38,20 @@
static bool permissionRequested = false;
static bool receivedScriptMessage;
static bool didFinishNavigation;
static bool captureStateDidChange;
static bool isCapturing;
static RetainPtr<WKScriptMessage> lastScriptMessage;
static RetainPtr<WKWebView> createdWebView;

@interface SpeechRecognitionPermissionUIDelegate : NSObject<WKUIDelegatePrivate>
@interface SpeechRecognitionUIDelegate : NSObject<WKUIDelegatePrivate>
- (void)_webView:(WKWebView *)webView requestSpeechRecognitionPermissionForOrigin:(WKSecurityOrigin *)origin decisionHandler:(void (^)(BOOL))decisionHandler;
- (void)_webView:(WKWebView *)webView requestMediaCaptureAuthorization: (_WKCaptureDevices)devices decisionHandler:(void (^)(BOOL))decisionHandler;
- (void)_webView:(WKWebView *)webView checkUserMediaPermissionForURL:(NSURL *)url mainFrameURL:(NSURL *)mainFrameURL frameIdentifier:(NSUInteger)frameIdentifier decisionHandler:(void (^)(NSString *salt, BOOL authorized))decisionHandler;
- (WKWebView *)webView:(WKWebView *)webView createWebViewWithConfiguration:(WKWebViewConfiguration *)configuration forNavigationAction:(WKNavigationAction *)navigationAction windowFeatures:(WKWindowFeatures *)windowFeatures;
- (void)_webView:(WKWebView *)webView mediaCaptureStateDidChange:(_WKMediaCaptureState)state;
@end

@implementation SpeechRecognitionPermissionUIDelegate
@implementation SpeechRecognitionUIDelegate
- (void)_webView:(WKWebView *)webView requestSpeechRecognitionPermissionForOrigin:(WKSecurityOrigin *)origin decisionHandler:(void (^)(BOOL))decisionHandler
{
permissionRequested = true;
@@ -70,6 +73,12 @@ - (WKWebView *)webView:(WKWebView *)webView createWebViewWithConfiguration:(WKWe
createdWebView = adoptNS([[WKWebView alloc] initWithFrame:CGRectMake(0, 0, 800, 600) configuration:configuration]);
return createdWebView.get();
}

- (void)_webView:(WKWebView *)webView mediaCaptureStateDidChange:(_WKMediaCaptureState)state
{
isCapturing = state == _WKMediaCaptureStateActiveMicrophone;
captureStateDidChange = true;
}
@end

@interface SpeechRecognitionMessageHandler : NSObject <WKScriptMessageHandler>
@@ -111,7 +120,7 @@ - (void)webView:(WKWebView *)webView didFinishNavigation:(WKNavigation *)navigat
preferences._mockCaptureDevicesEnabled = YES;
preferences._speechRecognitionEnabled = YES;
auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:CGRectMake(0, 0, 800, 600) configuration:configuration.get()]);
auto delegate = adoptNS([[SpeechRecognitionPermissionUIDelegate alloc] init]);
auto delegate = adoptNS([[SpeechRecognitionUIDelegate alloc] init]);
[webView setUIDelegate:delegate.get()];

shouldGrantPermissionRequest = false;
@@ -154,7 +163,7 @@ - (void)webView:(WKWebView *)webView didFinishNavigation:(WKNavigation *)navigat
preferences._mockCaptureDevicesEnabled = YES;
preferences._speechRecognitionEnabled = YES;
preferences._mediaCaptureRequiresSecureConnection = NO;
auto delegate = adoptNS([[SpeechRecognitionPermissionUIDelegate alloc] init]);
auto delegate = adoptNS([[SpeechRecognitionUIDelegate alloc] init]);
auto firstWebView = adoptNS([[TestWKWebView alloc] initWithFrame:CGRectMake(0, 0, 100, 100) configuration:configuration.get()]);
[firstWebView setUIDelegate:delegate.get()];
auto secondWebView = adoptNS([[TestWKWebView alloc] initWithFrame:CGRectMake(100, 0, 100, 100) configuration:configuration.get()]);
@@ -206,7 +215,7 @@ - (void)webView:(WKWebView *)webView didFinishNavigation:(WKNavigation *)navigat
preferences._mockCaptureDevicesEnabled = YES;
preferences._speechRecognitionEnabled = YES;
auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:CGRectMake(0, 0, 800, 600) configuration:configuration.get()]);
auto delegate = adoptNS([[SpeechRecognitionPermissionUIDelegate alloc] init]);
auto delegate = adoptNS([[SpeechRecognitionUIDelegate alloc] init]);
[webView setUIDelegate:delegate.get()];

// Page is visible.
@@ -241,7 +250,7 @@ - (void)webView:(WKWebView *)webView didFinishNavigation:(WKNavigation *)navigat
preferences._mockCaptureDevicesEnabled = YES;
preferences._speechRecognitionEnabled = YES;
preferences.javaScriptCanOpenWindowsAutomatically = YES;
auto delegate = adoptNS([[SpeechRecognitionPermissionUIDelegate alloc] init]);
auto delegate = adoptNS([[SpeechRecognitionUIDelegate alloc] init]);
auto navigationDelegate = adoptNS([[SpeechRecognitionNavigationDelegate alloc] init]);
shouldGrantPermissionRequest = true;
createdWebView = nullptr;
@@ -268,6 +277,31 @@ - (void)webView:(WKWebView *)webView didFinishNavigation:(WKNavigation *)navigat
EXPECT_TRUE(!!createdWebView);
}

TEST(WebKit2, SpeechRecognitionMediaCaptureStateChange)
{
auto configuration = adoptNS([[WKWebViewConfiguration alloc] init]);
auto handler = adoptNS([[SpeechRecognitionMessageHandler alloc] init]);
[[configuration userContentController] addScriptMessageHandler:handler.get() name:@"testHandler"];
auto preferences = [configuration preferences];
preferences._mockCaptureDevicesEnabled = YES;
preferences._speechRecognitionEnabled = YES;
auto delegate = adoptNS([[SpeechRecognitionUIDelegate alloc] init]);
auto webView = adoptNS([[TestWKWebView alloc] initWithFrame:CGRectMake(0, 0, 800, 600) configuration:configuration.get()]);
[webView setUIDelegate:delegate.get()];
shouldGrantPermissionRequest = true;

captureStateDidChange = false;
[webView synchronouslyLoadTestPageNamed:@"speechrecognition-basic"];
[webView stringByEvaluatingJavaScript:@"start()"];
TestWebKitAPI::Util::run(&captureStateDidChange);
EXPECT_TRUE(isCapturing);

captureStateDidChange = false;
[webView stringByEvaluatingJavaScript:@"stop()"];
TestWebKitAPI::Util::run(&captureStateDidChange);
EXPECT_FALSE(isCapturing);
}

#endif

} // namespace TestWebKitAPI

0 comments on commit 10a3c2f

Please sign in to comment.