Use-after-free in WebCore::AudioBufferSourceNode::renderFromBuffer

https://bugs.webkit.org/show_bug.cgi?id=270007 rdar://123510096 Reviewed by Jer Noble and Geoffrey Garen. JS sets the AudioBufferSourceNode.buffer attribute on the main thread. This buffer is then used on the audio thread for rendering. We were attempting to synchronize the threads via m_processLock. However, this only synchronizes the audio thread with AudioBufferSourceNode::setBuffer(). The JS could still modify the buffer's contents on the main thread via locking. Since the buffer's channels are backed by JS ArrayBuffers, the JS could even detach/transfer them. This could lead to use-after-free on the audio rendering thread when the JS does so. To address the issue, we improve the "acquire the buffer content" logic in the specification [1] When the buffer gets set and the node is already playing, or when the node starts playing, we copy the contents of the buffer provided by JS into m_outputChannels. The audio thread now only ever interacts with the copy in m_outputChannels. This is not as good performance-wise but it is needed for thread-safety. [1] https://webaudio.github.io/web-audio-api/#acquire-the-content * Source/WebCore/Modules/webaudio/AudioBufferSourceNode.cpp: (WebCore::AudioBufferSourceNode::process): (WebCore::AudioBufferSourceNode::renderFromBuffer): (WebCore::AudioBufferSourceNode::setBufferForBindings): (WebCore::AudioBufferSourceNode::acquireBufferContent): (WebCore::AudioBufferSourceNode::startPlaying): (WebCore::AudioBufferSourceNode::adjustGrainParameters): (WebCore::AudioBufferSourceNode::totalPitchRate): (WebCore::AudioBufferSourceNode::propagatesSilence const): * Source/WebCore/Modules/webaudio/AudioBufferSourceNode.h: Canonical link: https://commits.webkit.org/272448.614@safari-7618-branch
WebKit · Feb 24, 2024 · a1253c1 · a1253c1
1 parent b2c0d2d
commit a1253c1
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 22 deletions.
diff --git a/Source/WebCore/Modules/webaudio/AudioBufferSourceNode.cpp b/Source/WebCore/Modules/webaudio/AudioBufferSourceNode.cpp
@@ -120,7 +120,7 @@ void AudioBufferSourceNode::process(size_t framesToProcess)
     // After calling setBuffer() with a buffer having a different number of channels, there can in rare cases be a slight delay
     // before the output bus is updated to the new number of channels because of use of tryLocks() in the context's updating system.
     // In this case, if the buffer has just been changed and we're not quite ready yet, then just output silence.
-    if (numberOfChannels() != m_buffer->numberOfChannels()) {
+    if (numberOfChannels() != m_sourceChannels.size()) {
         outputBus.zero();
         return;
     }
@@ -207,8 +207,8 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
     // Offset the pointers to the correct offset frame.
     unsigned writeIndex = destinationFrameOffset;
 
-    size_t bufferLength = m_buffer->length();
-    double bufferSampleRate = m_buffer->sampleRate();
+    size_t bufferLength = m_sourceChannels[0].size();
+    double bufferSampleRate = m_sourceChannelsSampleRate;
     double pitchRate = totalPitchRate();
     bool reverse = pitchRate < 0;
 
@@ -232,8 +232,8 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
 
     if (m_isLooping && (m_loopStart || m_loopEnd) && m_loopStart >= 0 && m_loopEnd > 0 && m_loopStart < m_loopEnd) {
         // Convert from seconds to sample-frames.
-        double loopMinFrame = m_loopStart * m_buffer->sampleRate();
-        double loopMaxFrame = m_loopEnd * m_buffer->sampleRate();
+        double loopMinFrame = m_loopStart * m_sourceChannelsSampleRate;
+        double loopMaxFrame = m_loopEnd * m_sourceChannelsSampleRate;
 
         virtualMaxFrame = std::min(loopMaxFrame, virtualMaxFrame);
         virtualMinFrame = std::max(loopMinFrame, virtualMinFrame);
@@ -243,7 +243,7 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
     // If we're looping and the offset (virtualReadIndex) is past the end of the loop, wrap back to the
     // beginning of the loop. For other cases, nothing needs to be done.
     if (m_isLooping && m_virtualReadIndex >= virtualMaxFrame) {
-        m_virtualReadIndex = (m_loopStart < 0) ? 0 : (m_loopStart * m_buffer->sampleRate());
+        m_virtualReadIndex = (m_loopStart < 0) ? 0 : (m_loopStart * m_sourceChannelsSampleRate);
         m_virtualReadIndex = std::min(m_virtualReadIndex, static_cast<double>(bufferLength - 1));
     }
 
@@ -267,7 +267,6 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
     // Render loop - reading from the source buffer to the destination using linear interpolation.
     int framesToProcess = numberOfFrames;
 
-    const float** sourceChannels = m_sourceChannels.get();
     float** destinationChannels = m_destinationChannels.get();
 
     // Optimize for the very common case of playing back with pitchRate == 1.
@@ -282,7 +281,7 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
             framesThisTime = std::max(0, framesThisTime);
 
             for (unsigned i = 0; i < numberOfChannels; ++i) 
-                memcpy(destinationChannels[i] + writeIndex, sourceChannels[i] + readIndex, sizeof(float) * framesThisTime);
+                memcpy(destinationChannels[i] + writeIndex, m_sourceChannels[i].data() + readIndex, sizeof(float) * framesThisTime);
 
             writeIndex += framesThisTime;
             readIndex += framesThisTime;
@@ -308,7 +307,7 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
             while (framesThisTime--) {
                 for (unsigned i = 0; i < numberOfChannels; ++i) {
                     float* destination = destinationChannels[i];
-                    const float* source = sourceChannels[i];
+                    auto& source = m_sourceChannels[i];
 
                     destination[writeIndex] = source[readIndex];
                 }
@@ -330,7 +329,7 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
         unsigned readIndex = static_cast<unsigned>(virtualReadIndex);
 
         for (unsigned i = 0; i < numberOfChannels; ++i)
-            std::fill_n(destinationChannels[i] + writeIndex, framesToProcess, sourceChannels[i][readIndex]);
+            std::fill_n(destinationChannels[i] + writeIndex, framesToProcess, m_sourceChannels[i][readIndex]);
     } else if (reverse) {
         unsigned maxFrame = static_cast<unsigned>(virtualMaxFrame);
         unsigned minFrame = static_cast<unsigned>(floorf(virtualMinFrame));
@@ -346,9 +345,9 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
             // Linear interpolation.
             for (unsigned i = 0; i < numberOfChannels; ++i) {
                 float* destination = destinationChannels[i];
-                const float* source = sourceChannels[i];
+                auto& source = m_sourceChannels[i];
 
-                destination[writeIndex] = computeSampleUsingLinearInterpolation(source, readIndex, readIndex2, interpolationFactor);
+                destination[writeIndex] = computeSampleUsingLinearInterpolation(source.data(), readIndex, readIndex2, interpolationFactor);
             }
 
             writeIndex++;
@@ -385,9 +384,9 @@ bool AudioBufferSourceNode::renderFromBuffer(AudioBus* bus, unsigned destination
             // Linear interpolation.
             for (unsigned i = 0; i < numberOfChannels; ++i) {
                 float* destination = destinationChannels[i];
-                const float* source = sourceChannels[i];
+                auto& source = m_sourceChannels[i];
 
-                destination[writeIndex] = computeSampleUsingLinearInterpolation(source, readIndex, readIndex2, interpolationFactor);
+                destination[writeIndex] = computeSampleUsingLinearInterpolation(source.data(), readIndex, readIndex2, interpolationFactor);
             }
             writeIndex++;
 
@@ -432,11 +431,7 @@ ExceptionOr<void> AudioBufferSourceNode::setBufferForBindings(RefPtr<AudioBuffer
 
         output(0)->setNumberOfChannels(numberOfChannels);
 
-        m_sourceChannels = makeUniqueArray<const float*>(numberOfChannels);
         m_destinationChannels = makeUniqueArray<float*>(numberOfChannels);
-
-        for (unsigned i = 0; i < numberOfChannels; ++i) 
-            m_sourceChannels[i] = buffer->channelData(i)->data();
     }
 
     m_virtualReadIndex = 0;
@@ -446,9 +441,30 @@ ExceptionOr<void> AudioBufferSourceNode::setBufferForBindings(RefPtr<AudioBuffer
     if (m_isGrain)
         adjustGrainParameters();
 
+    if (isPlayingOrScheduled())
+        acquireBufferContent();
+
     return { };
 }
 
+void AudioBufferSourceNode::acquireBufferContent()
+{
+    ASSERT(isMainThread());
+    if (!m_buffer) {
+        m_sourceChannels = FixedVector<Vector<float>>();
+        m_sourceChannelsSampleRate = 0;
+        return;
+    }
+
+    m_sourceChannelsSampleRate = m_buffer->sampleRate();
+    m_sourceChannels = FixedVector<Vector<float>>(m_buffer->numberOfChannels());
+    for (unsigned i = 0; i < m_buffer->numberOfChannels(); ++i) {
+        // Make sure we copy the buffer's content for the audio thread to use, since JS on
+        // the main thread may detach / transfer array buffers.
+        m_sourceChannels[i] = Vector<float> { m_buffer->rawChannelData(i), m_buffer->length() };
+    }
+}
+
 unsigned AudioBufferSourceNode::numberOfChannels()
 {
     return output(0)->numberOfChannels();
@@ -513,13 +529,15 @@ ExceptionOr<void> AudioBufferSourceNode::startPlaying(double when, double grainO
 
     adjustGrainParameters();
 
+    acquireBufferContent();
     m_playbackState = SCHEDULED_STATE;
 
     return { };
 }
 
 void AudioBufferSourceNode::adjustGrainParameters()
 {
+    ASSERT(isMainThread());
     ASSERT(m_processLock.isHeld());
 
     if (!m_buffer)
@@ -555,11 +573,12 @@ void AudioBufferSourceNode::adjustGrainParameters()
 
 double AudioBufferSourceNode::totalPitchRate()
 {
+    ASSERT(!isMainThread());
     // Incorporate buffer's sample-rate versus AudioContext's sample-rate.
     // Normally it's not an issue because buffers are loaded at the AudioContext's sample-rate, but we can handle it in any case.
     double sampleRateFactor = 1.0;
     if (m_buffer)
-        sampleRateFactor = m_buffer->sampleRate() / static_cast<double>(sampleRate());
+        sampleRateFactor = m_sourceChannelsSampleRate / static_cast<double>(sampleRate());
 
     double basePitchRate = playbackRate().finalValue();
     double detune = pow(2, m_detune->finalValue() / 1200);
@@ -587,7 +606,7 @@ bool AudioBufferSourceNode::propagatesSilence() const
         return false;
     }
     Locker locker { AdoptLock, m_processLock };
-    return !m_buffer;
+    return m_sourceChannels.isEmpty();
 }
 
 } // namespace WebCore

diff --git a/Source/WebCore/Modules/webaudio/AudioBufferSourceNode.h b/Source/WebCore/Modules/webaudio/AudioBufferSourceNode.h
@@ -85,6 +85,8 @@ class AudioBufferSourceNode final : public AudioScheduledSourceNode {
 private:
     AudioBufferSourceNode(BaseAudioContext&);
 
+    void acquireBufferContent() WTF_REQUIRES_LOCK(m_processLock);
+
     double tailTime() const final { return 0; }
     double latencyTime() const final { return 0; }
 
@@ -98,10 +100,11 @@ class AudioBufferSourceNode final : public AudioScheduledSourceNode {
     inline bool renderSilenceAndFinishIfNotLooping(AudioBus*, unsigned index, size_t framesToProcess) WTF_REQUIRES_LOCK(m_processLock);
 
     // m_buffer holds the sample data which this node outputs.
-    RefPtr<AudioBuffer> m_buffer WTF_GUARDED_BY_LOCK(m_processLock); // Only modified on the main thread but used on the audio thread.
+    RefPtr<AudioBuffer> m_buffer; // Only used on the main thread.
 
     // Pointers for the buffer and destination.
-    UniqueArray<const float*> m_sourceChannels;
+    double m_sourceChannelsSampleRate WTF_GUARDED_BY_LOCK(m_processLock) { 0 };
+    FixedVector<Vector<float>> m_sourceChannels WTF_GUARDED_BY_LOCK(m_processLock);
     UniqueArray<float*> m_destinationChannels;
 
     Ref<AudioParam> m_detune;