media/audio/win/audio_low_latency_input_win.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/audio/win/audio_low_latency_input_win.h"

#include <objbase.h>
#include <propkey.h>
#include <windows.devices.enumeration.h>
#include <windows.media.devices.h>

#include <algorithm>
#include <cmath>
#include <memory>
#include <utility>

#include "base/logging.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/trace_event/common/trace_event_common.h"
#include "base/trace_event/trace_event.h"
#include "base/win/core_winrt_util.h"
#include "base/win/scoped_propvariant.h"
#include "base/win/scoped_variant.h"
#include "base/win/vector.h"
#include "base/win/windows_version.h"
#include "media/audio/audio_device_description.h"
#include "media/audio/audio_features.h"
#include "media/audio/win/avrt_wrapper_win.h"
#include "media/audio/win/core_audio_util_win.h"
#include "media/base/audio_block_fifo.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/base/channel_layout.h"
#include "media/base/limits.h"
#include "media/base/media_switches.h"
#include "media/base/timestamp_constants.h"

using ABI::Windows::Foundation::Collections::IVectorView;
using ABI::Windows::Media::Devices::IMediaDeviceStatics;
using ABI::Windows::Media::Effects::IAudioCaptureEffectsManager;
using ABI::Windows::Media::Effects::IAudioEffectsManagerStatics;
using base::win::GetActivationFactory;
using base::win::ScopedCoMem;
using base::win::ScopedCOMInitializer;
using base::win::ScopedHString;
using Microsoft::WRL::ComPtr;

namespace media {

namespace {

constexpr char kUwpDeviceIdPrefix[] = "\\\\?\\SWD#MMDEVAPI#";

constexpr uint32_t KSAUDIO_SPEAKER_UNSUPPORTED = 0;

// Converts a COM error into a human-readable string.
std::string ErrorToString(HRESULT hresult) {
  return CoreAudioUtil::ErrorToString(hresult);
}

// Errors when initializing the audio client related to the audio format. Split
// by whether we're using format conversion or not. Used for reporting stats -
// do not renumber entries.
enum FormatRelatedInitError {
  kUnsupportedFormat = 0,
  kUnsupportedFormatWithFormatConversion = 1,
  kInvalidArgument = 2,
  kInvalidArgumentWithFormatConversion = 3,
  kCount
};

bool IsSupportedFormatForConversion(WAVEFORMATEXTENSIBLE* format_ex) {
  WAVEFORMATEX* format = &format_ex->Format;
  if (format->nSamplesPerSec < limits::kMinSampleRate ||
      format->nSamplesPerSec > limits::kMaxSampleRate) {
    return false;
  }

  switch (format->wBitsPerSample) {
    case 8:
    case 16:
    case 32:
      break;
    default:
      return false;
  }

  if (GuessChannelLayout(format->nChannels) == CHANNEL_LAYOUT_UNSUPPORTED) {
    LOG(ERROR) << "Hardware configuration not supported for audio conversion";
    return false;
  }

  return true;
}

// Converts ChannelLayout to Microsoft's channel configuration but only discrete
// and up to stereo is supported currently. All other multi-channel layouts
// return KSAUDIO_SPEAKER_UNSUPPORTED.
ChannelConfig ChannelLayoutToChannelConfig(ChannelLayout layout) {
  switch (layout) {
    case CHANNEL_LAYOUT_DISCRETE:
      return KSAUDIO_SPEAKER_DIRECTOUT;
    case CHANNEL_LAYOUT_MONO:
      return KSAUDIO_SPEAKER_MONO;
    case CHANNEL_LAYOUT_STEREO:
      return KSAUDIO_SPEAKER_STEREO;
    default:
      LOG(WARNING) << "Unsupported channel layout: " << layout;
      // KSAUDIO_SPEAKER_UNSUPPORTED equals 0 and corresponds to "no specific
      // channel order".
      return KSAUDIO_SPEAKER_UNSUPPORTED;
  }
}

const char* StreamOpenResultToString(
    WASAPIAudioInputStream::StreamOpenResult result) {
  switch (result) {
    case WASAPIAudioInputStream::OPEN_RESULT_OK:
      return "OK";
    case WASAPIAudioInputStream::OPEN_RESULT_CREATE_INSTANCE:
      return "CREATE_INSTANCE";
    case WASAPIAudioInputStream::OPEN_RESULT_NO_ENDPOINT:
      return "NO_ENDPOINT";
    case WASAPIAudioInputStream::OPEN_RESULT_NO_STATE:
      return "NO_STATE";
    case WASAPIAudioInputStream::OPEN_RESULT_DEVICE_NOT_ACTIVE:
      return "DEVICE_NOT_ACTIVE";
    case WASAPIAudioInputStream::OPEN_RESULT_ACTIVATION_FAILED:
      return "ACTIVATION_FAILED";
    case WASAPIAudioInputStream::OPEN_RESULT_FORMAT_NOT_SUPPORTED:
      return "FORMAT_NOT_SUPPORTED";
    case WASAPIAudioInputStream::OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED:
      return "AUDIO_CLIENT_INIT_FAILED";
    case WASAPIAudioInputStream::OPEN_RESULT_GET_BUFFER_SIZE_FAILED:
      return "GET_BUFFER_SIZE_FAILED";
    case WASAPIAudioInputStream::OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED:
      return "LOOPBACK_ACTIVATE_FAILED";
    case WASAPIAudioInputStream::OPEN_RESULT_LOOPBACK_INIT_FAILED:
      return "LOOPBACK_INIT_FAILED";
    case WASAPIAudioInputStream::OPEN_RESULT_SET_EVENT_HANDLE:
      return "SET_EVENT_HANDLE";
    case WASAPIAudioInputStream::OPEN_RESULT_NO_CAPTURE_CLIENT:
      return "NO_CAPTURE_CLIENT";
    case WASAPIAudioInputStream::OPEN_RESULT_NO_AUDIO_VOLUME:
      return "NO_AUDIO_VOLUME";
    case WASAPIAudioInputStream::OPEN_RESULT_OK_WITH_RESAMPLING:
      return "OK_WITH_RESAMPLING";
  }
  return "UNKNOWN";
}

const char* EffectTypeToString(
    ABI::Windows::Media::Effects::AudioEffectType type) {
  switch (type) {
    case ABI::Windows::Media::Effects::AudioEffectType_Other:
      return "Other/None";
    case ABI::Windows::Media::Effects::AudioEffectType_AcousticEchoCancellation:
      return "AcousticEchoCancellation";
    case ABI::Windows::Media::Effects::AudioEffectType_NoiseSuppression:
      return "NoiseSuppression";
    case ABI::Windows::Media::Effects::AudioEffectType_AutomaticGainControl:
      return "AutomaticGainControl";
    case ABI::Windows::Media::Effects::AudioEffectType_BeamForming:
      return "BeamForming";
    case ABI::Windows::Media::Effects::AudioEffectType_ConstantToneRemoval:
      return "ConstantToneRemoval";
    case ABI::Windows::Media::Effects::AudioEffectType_Equalizer:
      return "Equalizer";
    case ABI::Windows::Media::Effects::AudioEffectType_LoudnessEqualizer:
      return "LoudnessEqualizer";
    case ABI::Windows::Media::Effects::AudioEffectType_BassBoost:
      return "BassBoost";
    case ABI::Windows::Media::Effects::AudioEffectType_VirtualSurround:
      return "VirtualSurround";
    case ABI::Windows::Media::Effects::AudioEffectType_VirtualHeadphones:
      return "VirtualHeadphones";
    case ABI::Windows::Media::Effects::AudioEffectType_SpeakerFill:
      return "SpeakerFill";
    case ABI::Windows::Media::Effects::AudioEffectType_RoomCorrection:
      return "RoomCorrection";
    case ABI::Windows::Media::Effects::AudioEffectType_BassManagement:
      return "BassManagement";
    case ABI::Windows::Media::Effects::AudioEffectType_EnvironmentalEffects:
      return "EnvironmentalEffects";
    case ABI::Windows::Media::Effects::AudioEffectType_SpeakerProtection:
      return "SpeakerProtection";
    case ABI::Windows::Media::Effects::AudioEffectType_SpeakerCompensation:
      return "SpeakerCompensation";
    case ABI::Windows::Media::Effects::AudioEffectType_DynamicRangeCompression:
      return "DynamicRangeCompression";
    case ABI::Windows::Media::Effects::AudioEffectType_FarFieldBeamForming:
      return "FarFieldBeamForming";
    case ABI::Windows::Media::Effects::AudioEffectType_DeepNoiseSuppression:
      return "DeepNoiseSuppression";
  }
  return "Unknown";
}

bool VariantBoolToBool(VARIANT_BOOL var_bool) {
  switch (var_bool) {
    case VARIANT_TRUE:
      return true;
    case VARIANT_FALSE:
      return false;
  }
  LOG(ERROR) << "Invalid VARIANT_BOOL type";
  return false;
}

std::string GetOpenLogString(WASAPIAudioInputStream::StreamOpenResult result,
                             HRESULT hr,
                             WAVEFORMATEXTENSIBLE input_format,
                             WAVEFORMATEX output_format) {
  return base::StringPrintf(
      "WAIS::Open => (ERROR: result=%s, hresult=%#lx, input_format=[%s], "
      "output_format=[%s])",
      StreamOpenResultToString(result), hr,
      CoreAudioUtil::WaveFormatToString(&input_format).c_str(),
      CoreAudioUtil::WaveFormatToString(&output_format).c_str());
}

bool InitializeUWPSupport() {
  // Place the actual body of the initialization in a lambda and store the
  // result as a static since we don't expect this result to change between
  // runs.
  static const bool initialization_result = []() {
    // Windows.Media.Effects and Windows.Media.Devices requires Windows 10 build
    // 10.0.10240.0.
    DCHECK_GE(base::win::OSInfo::GetInstance()->version_number().build, 10240u);

    return true;
  }();

  return initialization_result;
}

}  // namespace

// Counts how often an OS capture callback reports a data discontinuity and logs
// it as a UMA histogram.
class WASAPIAudioInputStream::DataDiscontinuityReporter {
 public:
  // Logs once every 10s, assuming 10ms buffers.
  constexpr static int kCallbacksPerLogPeriod = 1000;

  DataDiscontinuityReporter() {}

  int GetLongTermDiscontinuityCountAndReset() {
    int long_term_count = data_discontinuity_long_term_count_;
    callback_count_ = 0;
    data_discontinuity_short_term_count_ = 0;
    data_discontinuity_long_term_count_ = 0;
    return long_term_count;
  }

  void Log(bool observed_data_discontinuity) {
    ++callback_count_;
    if (observed_data_discontinuity) {
      ++data_discontinuity_short_term_count_;
      ++data_discontinuity_long_term_count_;
    }

    if (callback_count_ % kCallbacksPerLogPeriod)
      return;

    // TODO(https://crbug.com/825744): It can be possible to replace
    // "Media.Audio.Capture.Glitches2" with this new (simplified) metric
    // instead.
    base::UmaHistogramCounts1000("Media.Audio.Capture.Win.Glitches2",
                                 data_discontinuity_short_term_count_);

    data_discontinuity_short_term_count_ = 0;
  }

 private:
  int callback_count_ = 0;
  int data_discontinuity_short_term_count_ = 0;
  int data_discontinuity_long_term_count_ = 0;
};

WASAPIAudioInputStream::WASAPIAudioInputStream(
    AudioManagerWin* manager,
    const AudioParameters& params,
    const std::string& device_id,
    AudioManager::LogCallback log_callback)
    : manager_(manager),
      glitch_reporter_(SystemGlitchReporter::StreamType::kCapture),
      peak_detector_(base::BindRepeating(&AudioManager::TraceAmplitudePeak,
                                         base::Unretained(manager_),
                                         /*trace_start=*/true)),
      data_discontinuity_reporter_(
          std::make_unique<DataDiscontinuityReporter>()),
      device_id_(device_id),
      log_callback_(std::move(log_callback)) {
  DCHECK(manager_);
  DCHECK(!device_id_.empty());
  DCHECK(!log_callback_.is_null());
  DCHECK_LE(params.channels(), 2);
  DCHECK(params.channel_layout() == CHANNEL_LAYOUT_MONO ||
         params.channel_layout() == CHANNEL_LAYOUT_STEREO ||
         params.channel_layout() == CHANNEL_LAYOUT_DISCRETE);
  SendLogMessage("%s({device_id=%s}, {params=[%s]})", __func__,
                 device_id.c_str(), params.AsHumanReadableString().c_str());

  // Load the Avrt DLL if not already loaded. Required to support MMCSS.
  bool avrt_init = avrt::Initialize();
  if (!avrt_init)
    SendLogMessage("%s => (WARNING: failed to load Avrt.dll)", __func__);

  const SampleFormat kSampleFormat = kSampleFormatS16;

  // The clients asks for an input stream specified by |params|. Start by
  // setting up an input device format according to the same specification.
  // If all goes well during the upcoming initialization, this format will not
  // change. However, under some circumstances, minor changes can be required
  // to fit the current input audio device. If so, a FIFO and/or and audio
  // converter might be needed to ensure that the output format of this stream
  // matches what the client asks for.
  WAVEFORMATEX* format = &input_format_.Format;
  format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  format->nChannels = params.channels();
  format->nSamplesPerSec = params.sample_rate();
  format->wBitsPerSample = SampleFormatToBitsPerChannel(kSampleFormat);
  format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
  format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;

  // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE which can be
  // required in combination with e.g. multi-channel microphone arrays.
  format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
  input_format_.Samples.wValidBitsPerSample = format->wBitsPerSample;
  input_format_.dwChannelMask =
      ChannelLayoutToChannelConfig(params.channel_layout());
  input_format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  SendLogMessage("%s => (audio engine format=[%s])", __func__,
                 CoreAudioUtil::WaveFormatToString(&input_format_).c_str());

  // Set up the fixed output format based on |params|. Will not be changed and
  // does not required an extended wave format structure since any multi-channel
  // input will be converted to stereo.
  output_format_.wFormatTag = WAVE_FORMAT_PCM;
  output_format_.nChannels = format->nChannels;
  output_format_.nSamplesPerSec = format->nSamplesPerSec;
  output_format_.wBitsPerSample = format->wBitsPerSample;
  output_format_.nBlockAlign = format->nBlockAlign;
  output_format_.nAvgBytesPerSec = format->nAvgBytesPerSec;
  output_format_.cbSize = 0;
  SendLogMessage("%s => (audio sink format=[%s])", __func__,
                 CoreAudioUtil::WaveFormatToString(&output_format_).c_str());

  // Size in bytes of each audio frame.
  frame_size_bytes_ = format->nBlockAlign;

  // Store size of audio packets which we expect to get from the audio
  // endpoint device in each capture event.
  packet_size_bytes_ = params.GetBytesPerBuffer(kSampleFormat);
  packet_size_frames_ = packet_size_bytes_ / format->nBlockAlign;
  SendLogMessage(
      "%s => (packet size=[%zu bytes/%zu audio frames/%.3f milliseconds])",
      __func__, packet_size_bytes_, packet_size_frames_,
      params.GetBufferDuration().InMillisecondsF());

  // All events are auto-reset events and non-signaled initially.

  // Create the event which the audio engine will signal each time
  // a buffer becomes ready to be processed by the client.
  audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  DCHECK(audio_samples_ready_event_.IsValid());

  // Create the event which will be set in Stop() when capturing shall stop.
  stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  DCHECK(stop_capture_event_.IsValid());
}

WASAPIAudioInputStream::~WASAPIAudioInputStream() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}

AudioInputStream::OpenOutcome WASAPIAudioInputStream::Open() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  SendLogMessage("%s([opened=%s])", __func__, opened_ ? "true" : "false");
  if (opened_) {
    return OpenOutcome::kAlreadyOpen;
  }

  // Obtain a reference to the IMMDevice interface of the capturing device with
  // the specified unique identifier or role which was set at construction.
  HRESULT hr = SetCaptureDevice();
  if (FAILED(hr)) {
    ReportOpenResult(hr);
    return OpenOutcome::kFailed;
  }

  // Check if raw audio processing is supported for the selected capture device.
  raw_processing_supported_ = RawProcessingSupported();

  if (raw_processing_supported_ &&
      !AudioDeviceDescription::IsLoopbackDevice(device_id_) &&
      InitializeUWPSupport()) {
    // Retrieve a unique identifier of the selected audio device but in a
    // format which can be used by UWP (or Core WinRT) APIs. It can then be
    // utilized in combination with the Windows.Media.Effects UWP API to
    // discover the audio processing chain on a device.
    std::string uwp_device_id = GetUWPDeviceId();
    if (!uwp_device_id.empty()) {
      // For the selected device, generate two lists of enabled audio effects
      // and store them in |default_effect_types_| and |raw_effect_types_|.
      // Default corresponds to "Normal audio signal processing" and Raw is for
      // "Minimal audio signal processing". These two lists are used for UMA
      // stats when the stream is closed.
      GetAudioCaptureEffects(uwp_device_id);
    }
  }

  use_fake_audio_capture_timestamps_ =
      base::FeatureList::IsEnabled(media::kUseFakeAudioCaptureTimestamps);
  if (use_fake_audio_capture_timestamps_) {
    SendLogMessage("%s => (WARNING: capture timestamps will be fake)",
                   __func__);
  }

  // Obtain an IAudioClient interface which enables us to create and initialize
  // an audio stream between an audio application and the audio engine.
  hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr,
                                  &audio_client_);
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_ACTIVATION_FAILED;
    ReportOpenResult(hr);
    return OpenOutcome::kFailed;
  }

  // Raw audio capture suppresses processing that down mixes e.g. a microphone
  // array into a supported format and instead exposes the device's native
  // format. Chrome only supports a maximum number of input channels given by
  // media::kMaxConcurrentChannels. Therefore, one additional test is needed
  // before stating that raw audio processing can be supported.
  // Failure will not prevent opening but the method must succeed to be able to
  // select raw input capture mode.
  WORD audio_engine_channels = 0;
  hr = GetAudioEngineNumChannels(&audio_engine_channels);

  // Attempt to enable communications category and raw capture mode on the audio
  // stream. Ignoring return value since the method logs its own error messages
  // and it should be OK to continue opening the stream even after a failure.
  if (base::FeatureList::IsEnabled(media::kWasapiRawAudioCapture) &&
      raw_processing_supported_ &&
      !AudioDeviceDescription::IsLoopbackDevice(device_id_) && SUCCEEDED(hr)) {
    SetCommunicationsCategoryAndMaybeRawCaptureMode(audio_engine_channels);
  }

  // Verify that the selected audio endpoint supports the specified format
  // set during construction and using the specified client properties.
  hr = S_OK;
  if (!DesiredFormatIsSupported(&hr)) {
    open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED;
    ReportOpenResult(hr);
    return OpenOutcome::kFailed;
  }

  // Initialize the audio stream between the client and the device using
  // shared mode and a lowest possible glitch-free latency.
  hr = InitializeAudioEngine();
  if (SUCCEEDED(hr) && converter_)
    open_result_ = OPEN_RESULT_OK_WITH_RESAMPLING;
  ReportOpenResult(hr);  // Report before we assign a value to |opened_|.
  opened_ = SUCCEEDED(hr);

  if (opened_) {
    return OpenOutcome::kSuccess;
  }

  switch (hr) {
    case E_ACCESSDENIED:
      return OpenOutcome::kFailedSystemPermissions;
    case AUDCLNT_E_DEVICE_IN_USE:
      return OpenOutcome::kFailedInUse;
    default:
      return OpenOutcome::kFailed;
  }
}

void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK(callback);
  SendLogMessage("%s([opened=%s, started=%s])", __func__,
                 opened_ ? "true" : "false", started_ ? "true" : "false");
  if (!opened_)
    return;

  if (started_)
    return;

  // Check if the master volume level of the opened audio session is set to
  // zero and store the information for a UMA histogram generated in Stop().
  // Valid volume levels are in the range 0.0 to 1.0.
  // See http://crbug.com/1014443 for details why this is needed.
  if (GetVolume() == 0.0) {
    SendLogMessage("%s => (WARNING: Input audio session starts at zero volume)",
                   __func__);
    audio_session_starts_at_zero_volume_ = true;
  }

  if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
      system_audio_volume_) {
    BOOL muted = false;
    system_audio_volume_->GetMute(&muted);

    // If the system audio is muted at the time of capturing, then no need to
    // mute it again, and later we do not unmute system audio when stopping
    // capturing.
    if (!muted) {
      system_audio_volume_->SetMute(true, nullptr);
      mute_done_ = true;
    }
  }

  DCHECK(!sink_);
  sink_ = callback;

  // Starts periodic AGC microphone measurements if the AGC has been enabled
  // using SetAutomaticGainControl().
  StartAgc();

  // Create and start the thread that will drive the capturing by waiting for
  // capture events.
  DCHECK(!capture_thread_.get());
  capture_thread_ = std::make_unique<base::DelegateSimpleThread>(
      this, "wasapi_capture_thread",
      base::SimpleThread::Options(base::ThreadType::kRealtimeAudio));
  capture_thread_->Start();

  // Start streaming data between the endpoint buffer and the audio engine.
  HRESULT hr = audio_client_->Start();
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: IAudioClient::Start=[%s])", __func__,
                   ErrorToString(hr).c_str());
  }

  if (SUCCEEDED(hr) && audio_render_client_for_loopback_.Get()) {
    hr = audio_render_client_for_loopback_->Start();
    if (FAILED(hr))
      SendLogMessage("%s => (ERROR: IAudioClient::Start=[%s] (loopback))",
                     __func__, ErrorToString(hr).c_str());
  }

  started_ = SUCCEEDED(hr);
}

void WASAPIAudioInputStream::Stop() {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  SendLogMessage("%s([started=%s])", __func__, started_ ? "true" : "false");
  if (!started_)
    return;

  // Only upload UMA histogram for the case when AGC is enabled, i.e., for
  // WebRTC based audio input streams.
  const bool add_uma_histogram = GetAutomaticGainControl();

  // We have muted system audio for capturing, so we need to unmute it when
  // capturing stops.
  if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
      mute_done_) {
    DCHECK(system_audio_volume_);
    if (system_audio_volume_) {
      system_audio_volume_->SetMute(false, nullptr);
      mute_done_ = false;
    }
  }

  // Stops periodic AGC microphone measurements.
  StopAgc();

  // Shut down the capture thread.
  if (stop_capture_event_.IsValid()) {
    SetEvent(stop_capture_event_.Get());
  }

  // Stop the input audio streaming.
  HRESULT hr = audio_client_->Stop();
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: IAudioClient::Stop=[%s])", __func__,
                   ErrorToString(hr).c_str());
  }

  // Wait until the thread completes and perform cleanup.
  if (capture_thread_) {
    SetEvent(stop_capture_event_.Get());
    capture_thread_->Join();
    capture_thread_.reset();
  }

  // Upload UMA histogram to track down possible issue that can lead to a
  // "no audio" state. See http://crbug.com/1014443.
  if (add_uma_histogram) {
    base::UmaHistogramBoolean("Media.Audio.InputVolumeStartsAtZeroWin",
                              audio_session_starts_at_zero_volume_);
    audio_session_starts_at_zero_volume_ = false;
  }

  SendLogMessage(
      "%s => (timestamp(n)-timestamp(n-1)=[min: %.3f msec, max: %.3f msec])",
      __func__, min_timestamp_diff_.InMillisecondsF(),
      max_timestamp_diff_.InMillisecondsF());

  started_ = false;
  sink_ = nullptr;
}

void WASAPIAudioInputStream::Close() {
  SendLogMessage("%s()", __func__);
  // It is valid to call Close() before calling open or Start().
  // It is also valid to call Close() after Start() has been called.
  Stop();

  // Only upload UMA histogram for the case when AGC is enabled, i.e., for
  // WebRTC based audio input streams.
  if (GetAutomaticGainControl()) {
    // Upload UMA histogram to track if the capture device supported raw audio
    // capture or not. See https://crbug.com/1133643.
    base::UmaHistogramBoolean("Media.Audio.RawProcessingSupportedWin",
                              raw_processing_supported_);

    // These UMAs are deprecated but keep adding the information as text logs
    // for debugging purposes.
    for (auto const& type : default_effect_types_) {
      SendLogMessage("%s => (Media.Audio.Capture.Win.DefaultEffectType=%s)",
                     __func__, EffectTypeToString(type));
    }
    for (auto const& type : raw_effect_types_) {
      SendLogMessage("%s => (Media.Audio.Capture.Win.RawEffectType=%s)",
                     __func__, EffectTypeToString(type));
    }
  }

  if (converter_)
    converter_->RemoveInput(this);

  ReportAndResetGlitchStats();

  // Inform the audio manager that we have been closed. This will cause our
  // destruction.
  manager_->ReleaseInputStream(this);
}

double WASAPIAudioInputStream::GetMaxVolume() {
  // Verify that Open() has been called successfully, to ensure that an audio
  // session exists and that an ISimpleAudioVolume interface has been created.
  DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
  if (!opened_)
    return 0.0;

  // The effective volume value is always in the range 0.0 to 1.0, hence
  // we can return a fixed value (=1.0) here.
  return 1.0;
}

void WASAPIAudioInputStream::SetVolume(double volume) {
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  DCHECK_GE(volume, 0.0);
  DCHECK_LE(volume, 1.0);
  SendLogMessage("%s({volume=%.2f} [opened=%s])", __func__, volume,
                 opened_ ? "true" : "false");
  if (!opened_)
    return;

  // Set a new master volume level. Valid volume levels are in the range
  // 0.0 to 1.0. Ignore volume-change events.
  HRESULT hr = simple_audio_volume_->SetMasterVolume(static_cast<float>(volume),
                                                     nullptr);
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: ISimpleAudioVolume::SetMasterVolume=[%s])",
                   __func__, ErrorToString(hr).c_str());
  }

  // Update the AGC volume level based on the last setting above. Note that,
  // the volume-level resolution is not infinite and it is therefore not
  // possible to assume that the volume provided as input parameter can be
  // used directly. Instead, a new query to the audio hardware is required.
  // This method does nothing if AGC is disabled.
  UpdateAgcVolume();
}

double WASAPIAudioInputStream::GetVolume() {
  DCHECK(opened_) << "Open() has not been called successfully";
  if (!opened_)
    return 0.0;

  // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
  float level = 0.0f;
  HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: ISimpleAudioVolume::GetMasterVolume=[%s])",
                   __func__, ErrorToString(hr).c_str());
  }

  return static_cast<double>(level);
}

bool WASAPIAudioInputStream::IsMuted() {
  DCHECK(opened_) << "Open() has not been called successfully";
  DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
  if (!opened_)
    return false;

  // Retrieves the current muting state for the audio session.
  BOOL is_muted = FALSE;
  HRESULT hr = simple_audio_volume_->GetMute(&is_muted);
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: ISimpleAudioVolume::GetMute=[%s])", __func__,
                   ErrorToString(hr).c_str());
  }

  return is_muted != FALSE;
}

void WASAPIAudioInputStream::SetOutputDeviceForAec(
    const std::string& output_device_id) {
  // Not supported. Do nothing.
}

void WASAPIAudioInputStream::SendLogMessage(const char* format, ...) {
  if (log_callback_.is_null())
    return;
  va_list args;
  va_start(args, format);
  std::string msg("WAIS::" + base::StringPrintV(format, args));
  log_callback_.Run(msg);
  va_end(args);
}

void WASAPIAudioInputStream::Run() {
  ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);

  // Enable MMCSS to ensure that this thread receives prioritized access to
  // CPU resources.
  DWORD task_index = 0;
  HANDLE mm_task =
      avrt::AvSetMmThreadCharacteristics(L"Pro Audio", &task_index);
  bool mmcss_is_ok =
      (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
  if (!mmcss_is_ok) {
    // Failed to enable MMCSS on this thread. It is not fatal but can lead
    // to reduced QoS at high load.
    DWORD err = GetLastError();
    LOG(ERROR) << "WAIS::" << __func__
               << " => (ERROR: Failed to enable MMCSS (error code=" << err
               << "))";
  }

  // Allocate a buffer with a size that enables us to take care of cases like:
  // 1) The recorded buffer size is smaller, or does not match exactly with,
  //    the selected packet size used in each callback.
  // 2) The selected buffer size is larger than the recorded buffer size in
  //    each event.
  // In the case where no resampling is required, a single buffer should be
  // enough but in case we get buffers that don't match exactly, we'll go with
  // two. Same applies if we need to resample and the buffer ratio is perfect.
  // However if the buffer ratio is imperfect, we will need 3 buffers to safely
  // be able to buffer up data in cases where a conversion requires two audio
  // buffers (and we need to be able to write to the third one).
  size_t capture_buffer_size =
      std::max(2 * endpoint_buffer_size_frames_ * frame_size_bytes_,
               2 * packet_size_frames_ * frame_size_bytes_);
  int buffers_required = capture_buffer_size / packet_size_bytes_;
  if (converter_ && imperfect_buffer_size_conversion_)
    ++buffers_required;

  DCHECK(!fifo_);
  fifo_ = std::make_unique<AudioBlockFifo>(
      input_format_.Format.nChannels, packet_size_frames_, buffers_required);
  DVLOG(1) << "AudioBlockFifo buffer count: " << buffers_required;

  bool recording = true;
  bool error = false;
  HANDLE wait_array[2] = {stop_capture_event_.Get(),
                          audio_samples_ready_event_.Get()};

  record_start_time_ = base::TimeTicks::Now();
  last_capture_time_ = base::TimeTicks();
  max_timestamp_diff_ = base::TimeDelta::Min();
  min_timestamp_diff_ = base::TimeDelta::Max();

  while (recording && !error) {
    // Wait for a close-down event or a new capture event.
    DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
    switch (wait_result) {
      case WAIT_OBJECT_0 + 0:
        // |stop_capture_event_| has been set.
        recording = false;
        break;
      case WAIT_OBJECT_0 + 1:
        // |audio_samples_ready_event_| has been set.
        PullCaptureDataAndPushToSink();
        break;
      case WAIT_FAILED:
      default:
        error = true;
        break;
    }
  }

  if (recording && error) {
    // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
    // stopping the audio client, joining the thread etc.?
    NOTREACHED() << "WASAPI capturing failed with error code "
                 << GetLastError();
  }

  // Disable MMCSS.
  if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
    PLOG(WARNING) << "Failed to disable MMCSS";
  }

  fifo_.reset();
}

void WASAPIAudioInputStream::PullCaptureDataAndPushToSink() {
  TRACE_EVENT1("audio", "WASAPIAudioInputStream::PullCaptureDataAndPushToSink",
               "sample rate", input_format_.Format.nSamplesPerSec);

  UINT64 last_device_position = 0;
  UINT32 num_frames_in_next_packet = 0;

  // Get the number of frames in the next data packet in the capture endpoint
  // buffer. The count reported by GetNextPacketSize matches the count retrieved
  // in the GetBuffer call that follows this call.
  HRESULT hr =
      audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
  if (FAILED(hr)) {
    LOG(ERROR) << "WAIS::" << __func__
               << " => (ERROR: 1-IAudioCaptureClient::GetNextPacketSize=["
               << ErrorToString(hr).c_str() << "])";
    return;
  }

  // Pull data from the capture endpoint buffer until it's empty or an error
  // occurs. Drains the WASAPI capture buffer fully.
  while (num_frames_in_next_packet > 0) {
    BYTE* data_ptr = nullptr;
    UINT32 num_frames_to_read = 0;
    DWORD flags = 0;
    UINT64 device_position = 0;
    UINT64 capture_time_100ns = 0;

    // Retrieve the amount of data in the capture endpoint buffer, replace it
    // with silence if required, create callbacks for each packet and store
    // non-delivered data for the next event.
    hr =
        audio_capture_client_->GetBuffer(&data_ptr, &num_frames_to_read, &flags,
                                         &device_position, &capture_time_100ns);
    if (hr == AUDCLNT_S_BUFFER_EMPTY) {
      DCHECK_EQ(num_frames_to_read, 0u);
      return;
    }
    if (hr == AUDCLNT_E_OUT_OF_ORDER) {
      // A previous IAudioCaptureClient::GetBuffer() call is still in effect.
      // Release any acquired buffer to be able to try reading a buffer again.
      audio_capture_client_->ReleaseBuffer(num_frames_to_read);
    }
    if (FAILED(hr)) {
      LOG(ERROR) << "WAIS::" << __func__
                 << " => (ERROR: IAudioCaptureClient::GetBuffer=["
                 << ErrorToString(hr).c_str() << "])";
      return;
    }

    // The data in the packet is not correlated with the previous packet's
    // device position; this is possibly due to a stream state transition or
    // timing glitch. Note that, usage of this flag was added after the existing
    // glitch detection and it will be used as a supplementary scheme initially.
    // The behavior of the AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY flag is
    // undefined on the application's first call to GetBuffer after Start and
    // Windows 7 or later is required for support.
    const bool observed_data_discontinuity =
        (device_position > 0 && flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY);
    if (observed_data_discontinuity) {
      LOG(WARNING) << "WAIS::" << __func__
                   << " => (WARNING: AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY)";
    }
    data_discontinuity_reporter_->Log(observed_data_discontinuity);

    // The time at which the device's stream position was recorded is uncertain.
    // Thus, the client might be unable to accurately set a time stamp for the
    // current data packet.
    bool timestamp_error_was_detected = false;
    if (flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR) {
      // TODO(https://crbug.com/825744): it might be possible to improve error
      // handling here and avoid using the counter in |capture_time_100ns|.
      LOG(WARNING) << "WAIS::" << __func__
                   << " => (WARNING: AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR)";
      if (num_timestamp_errors_ == 0) {
        // Measure the time it took until the first timestamp error was found.
        time_until_first_timestamp_error_ =
            base::TimeTicks::Now() - record_start_time_;
      }
      ++num_timestamp_errors_;
      timestamp_error_was_detected = true;
    }

    // If the device position has changed, we assume this data belongs to a new
    // chunk, so we report delay and glitch stats and update the last and next
    // expected device positions.
    // If the device position has not changed we assume this data belongs to the
    // previous chunk, and only update the expected next device position.
    if (device_position != last_device_position) {
      if (expected_next_device_position_ != 0) {
        base::TimeDelta glitch_duration;
        if (device_position > expected_next_device_position_) {
          glitch_duration = AudioTimestampHelper::FramesToTime(
              device_position - expected_next_device_position_,
              input_format_.Format.nSamplesPerSec);
        }
        glitch_reporter_.UpdateStats(glitch_duration);
      }

      last_device_position = device_position;
      expected_next_device_position_ = device_position + num_frames_to_read;
    } else {
      expected_next_device_position_ += num_frames_to_read;
    }

    base::TimeTicks capture_time;
    if (use_fake_audio_capture_timestamps_) {
      capture_time = base::TimeTicks::Now();
    } else if (!timestamp_error_was_detected) {
      // Use the latest |capture_time_100ns| since it is marked as valid.
      capture_time += base::Microseconds(capture_time_100ns / 10.0);
    }
    if (capture_time <= last_capture_time_) {
      // Latest |capture_time_100ns| can't be trusted. Ensure a monotonic time-
      // stamp sequence by adding one microsecond to the latest timestamp.
      capture_time = last_capture_time_ + base::Microseconds(1);
    }

    // Keep track of max and min time difference between two successive time-
    // stamps. Results are used in Stop() to verify that the time-stamp sequence
    // was monotonic.
    if (!last_capture_time_.is_null()) {
      const auto delta_ts = capture_time - last_capture_time_;
      DCHECK_GT(device_position, 0u);
      DCHECK_GT(delta_ts, base::TimeDelta::Min());
      if (delta_ts > max_timestamp_diff_) {
        max_timestamp_diff_ = delta_ts;
      } else if (delta_ts < min_timestamp_diff_) {
        min_timestamp_diff_ = delta_ts;
      }
    }

    // Store the capture timestamp. Might be used as reference next time if
    // a new valid timestamp can't be retrieved to always guarantee a monotonic
    // sequence.
    last_capture_time_ = capture_time;

    // Adjust |capture_time| for the FIFO before pushing.
    capture_time -= AudioTimestampHelper::FramesToTime(
        fifo_->GetAvailableFrames(), input_format_.Format.nSamplesPerSec);

    if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
      fifo_->PushSilence(num_frames_to_read);
    } else {
      const int bytes_per_sample = input_format_.Format.wBitsPerSample / 8;

      peak_detector_.FindPeak(data_ptr, num_frames_to_read, bytes_per_sample);
      fifo_->Push(data_ptr, num_frames_to_read, bytes_per_sample);
    }

    hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
    if (FAILED(hr)) {
      LOG(ERROR) << "WAIS::" << __func__
                 << " => (ERROR: IAudioCaptureClient::ReleaseBuffer=["
                 << ErrorToString(hr).c_str() << "])";
      return;
    }

    // Get a cached AGC volume level which is updated once every second on the
    // audio manager thread. Note that, |volume| is also updated each time
    // SetVolume() is called through IPC by the render-side AGC.
    double volume = 0.0;
    GetAgcVolume(&volume);

    // Deliver captured data to the registered consumer using a packet size
    // which was specified at construction.
    while (fifo_->available_blocks()) {
      if (converter_) {
        if (imperfect_buffer_size_conversion_ &&
            fifo_->available_blocks() == 1) {
          // Special case. We need to buffer up more audio before we can convert
          // or else we'll suffer an underrun.
          // TODO(grunell): Verify this is really true.
          return;
        }
        converter_->Convert(convert_bus_.get());
        sink_->OnData(convert_bus_.get(), capture_time, volume, {});

        // Move the capture time forward for each vended block.
        capture_time += AudioTimestampHelper::FramesToTime(
            convert_bus_->frames(), output_format_.nSamplesPerSec);
      } else {
        sink_->OnData(fifo_->Consume(), capture_time, volume, {});

        // Move the capture time forward for each vended block.
        capture_time += AudioTimestampHelper::FramesToTime(
            packet_size_frames_, input_format_.Format.nSamplesPerSec);
      }
    }

    // Get the number of frames in the next data packet in the capture endpoint
    // buffer. Keep reading if more samples exist.
    hr = audio_capture_client_->GetNextPacketSize(&num_frames_in_next_packet);
    if (FAILED(hr)) {
      LOG(ERROR) << "WAIS::" << __func__
                 << " => (ERROR: 2-IAudioCaptureClient::GetNextPacketSize=["
                 << ErrorToString(hr).c_str() << "])";
      return;
    }
  }  // while (num_frames_in_next_packet > 0)
}

void WASAPIAudioInputStream::HandleError(HRESULT err) {
  NOTREACHED() << "Error code: " << err;
  if (sink_)
    sink_->OnError();
}

HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
  DCHECK_EQ(OPEN_RESULT_OK, open_result_);
  DCHECK(!endpoint_device_.Get());
  SendLogMessage("%s()", __func__);

  Microsoft::WRL::ComPtr<IMMDeviceEnumerator> enumerator;
  HRESULT hr = ::CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr,
                                  CLSCTX_ALL, IID_PPV_ARGS(&enumerator));
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_CREATE_INSTANCE;
    return hr;
  }

  // Retrieve the IMMDevice by using the specified role or the specified
  // unique endpoint device-identification string.

  // To open a stream in loopback mode, the client must obtain an IMMDevice
  // interface for the rendering endpoint device. Make that happen if needed;
  // otherwise use default capture data-flow direction.
  const EDataFlow data_flow =
      AudioDeviceDescription::IsLoopbackDevice(device_id_) ? eRender : eCapture;
  // Determine selected role to be used if the device is a default device.
  const ERole role = AudioDeviceDescription::IsCommunicationsDevice(device_id_)
                         ? eCommunications
                         : eConsole;
  if (AudioDeviceDescription::IsDefaultDevice(device_id_) ||
      AudioDeviceDescription::IsCommunicationsDevice(device_id_) ||
      AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
    hr =
        enumerator->GetDefaultAudioEndpoint(data_flow, role, &endpoint_device_);
  } else {
    hr = enumerator->GetDevice(base::UTF8ToWide(device_id_).c_str(),
                               &endpoint_device_);
  }
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_NO_ENDPOINT;
    return hr;
  }

  // Get the volume interface for the endpoint. Used in `Stop()` to query the
  // volume range of the selected input device or to get/set mute state in
  // `Start()` and `Stop()` if a loopback device with muted system audio is
  // requested.
  hr = endpoint_device_->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL,
                                  nullptr, &system_audio_volume_);
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_ACTIVATION_FAILED;
    return hr;
  }

  // Verify that the audio endpoint device is active, i.e., the audio
  // adapter that connects to the endpoint device is present and enabled.
  DWORD state = DEVICE_STATE_DISABLED;
  hr = endpoint_device_->GetState(&state);
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_NO_STATE;
    return hr;
  }

  if (!(state & DEVICE_STATE_ACTIVE)) {
    DLOG(ERROR) << "Selected capture device is not active.";
    open_result_ = OPEN_RESULT_DEVICE_NOT_ACTIVE;
    hr = E_ACCESSDENIED;
  }

  return hr;
}

bool WASAPIAudioInputStream::RawProcessingSupported() {
  DCHECK(endpoint_device_.Get());
  // Check if System.Devices.AudioDevice.RawProcessingSupported can be found
  // and queried in the Windows Property System. It corresponds to raw
  // processing mode support for the specified audio device. If its value is
  // VARIANT_TRUE the device supports raw processing mode.
  bool raw_processing_supported = false;
  Microsoft::WRL::ComPtr<IPropertyStore> properties;
  base::win::ScopedPropVariant raw_processing;
  if (FAILED(endpoint_device_->OpenPropertyStore(STGM_READ, &properties)) ||
      FAILED(
          properties->GetValue(PKEY_Devices_AudioDevice_RawProcessingSupported,
                               raw_processing.Receive())) ||
      raw_processing.get().vt != VT_BOOL) {
    SendLogMessage(
        "%s => (WARNING: failed to access "
        "System.Devices.AudioDevice.RawProcessingSupported)",
        __func__);
  } else {
    raw_processing_supported = VariantBoolToBool(raw_processing.get().boolVal);
    SendLogMessage(
        "%s => (System.Devices.AudioDevice.RawProcessingSupported=%s)",
        __func__, raw_processing_supported ? "true" : "false");
  }
  return raw_processing_supported;
}

std::string WASAPIAudioInputStream::GetUWPDeviceId() {
  DCHECK(endpoint_device_.Get());

  // The Windows.Media.Devices.IMediaDeviceStatics interface provides access to
  // the implementation of Windows.Media.Devices.MediaDevice.
  ComPtr<IMediaDeviceStatics> media_device_statics;
  HRESULT hr =
      GetActivationFactory<IMediaDeviceStatics,
                           RuntimeClass_Windows_Media_Devices_MediaDevice>(
          &media_device_statics);
  if (FAILED(hr)) {
    DLOG(ERROR) << "IMediaDeviceStatics factory failed: " << ErrorToString(hr);
    return std::string();
  }

  // The remaining part of this method builds up the unique device ID needed
  // by the Windows.Media.Effects.AudioEffectsManager UWP API to enumerate
  // active capture effects like AEC and NS. The ID contains three parts.
  // Example:
  //   1) \\?\SWD#MMDEVAPI#
  //   2) {0.0.1.00000000}.{7c24467c-94fc-4fa1-a2b2-a3f5d9cb8a5b}
  //   3) #{2eef81be-33fa-4800-9670-1cd474972c3f}
  // Where (1) is a constant string, (2) comes from the IMMDevice::GetId() API,
  // and (3) is a substring of of the selector string which can be retrieved by
  // the IMediaDeviceStatics::GetAudioCaptureSelector UWP API. Knowledge about
  // the structure of this device ID can be gained by using the
  // IMediaDeviceStatics::GetDefaultAudioCaptureId UWP API but this method also
  // adds support for non default devices.

  // (1) Start building the final device ID. Start with the constant prefix.
  std::string device_id(kUwpDeviceIdPrefix);

  // (2) Next, add the unique ID from IMMDevice::GetId() API.
  // Example: {0.0.1.00000000}.{7c24467c-94fc-4fa1-a2b2-a3f5d9cb8a5b}.
  ScopedCoMem<WCHAR> immdevice_id16;
  hr = endpoint_device_->GetId(&immdevice_id16);
  if (FAILED(hr)) {
    DLOG(ERROR) << "IMMDevice::GetId failed: " << ErrorToString(hr);
    return std::string();
  }
  std::string immdevice_id8;
  base::WideToUTF8(immdevice_id16, wcslen(immdevice_id16), &immdevice_id8);
  device_id.append(immdevice_id8);

  // (3) Finally, add the last part from the selector string.
  // Example: '#{2eef81be-33fa-4800-9670-1cd474972c3f}'.
  HSTRING selector;
  // Returns the identifier string of a device for capturing audio. A substring
  // will be used when generating the final unique device ID.
  // Example: part of the selector string can look like
  // System.Devices.InterfaceClassGuid:="{2eef81be-33fa-4800-9670-1cd474972c3f}"
  // and we want the {2eef81be-33fa-4800-9670-1cd474972c3f} substring for our
  // purposes.
  hr = media_device_statics->GetAudioCaptureSelector(&selector);
  if (FAILED(hr)) {
    DLOG(ERROR) << "IMediaDeviceStatics::GetAudioCaptureSelector failed: "
                << ErrorToString(hr);
    return std::string();
  }
  device_id.append("#");
  std::string selector_string = ScopedHString(selector).GetAsUTF8();
  std::size_t start = selector_string.find("{");
  std::size_t stop = selector_string.find("}", start + 1);
  if (start != std::string::npos && stop != std::string::npos) {
    // Will extract '{2eef81be-33fa-4800-9670-1cd474972c3f}' in the example
    // above.
    device_id.append(selector_string.substr(start, stop - start + 1));
  } else {
    DLOG(ERROR) << "Failed to extract System.Devices.InterfaceClassGuid string";
    return std::string();
  }

  return device_id;
}

HRESULT WASAPIAudioInputStream::GetAudioCaptureEffects(
    const std::string& uwp_device_id) {
  DCHECK(!AudioDeviceDescription::IsLoopbackDevice(device_id_));
  DCHECK(raw_processing_supported_);
  DCHECK(!uwp_device_id.empty());
  SendLogMessage("%s()", __func__);

  // The Windows.Media.Effects.IAudioEffectsManagerStatics interface provides
  // access to the implementation of Windows.Media.Effects.AudioEffectsManager.
  ComPtr<IAudioEffectsManagerStatics> audio_effects_manager;
  HRESULT hr = GetActivationFactory<
      IAudioEffectsManagerStatics,
      RuntimeClass_Windows_Media_Effects_AudioEffectsManager>(
      &audio_effects_manager);
  if (FAILED(hr)) {
    SendLogMessage(
        "%s => (ERROR: IAudioEffectsManagerStatics factory failed: [%s])",
        __func__, ErrorToString(hr).c_str());
    return hr;
  }

  SendLogMessage("%s => (uwp_device_id=[%s])", __func__, uwp_device_id.c_str());
  ScopedHString device_id = ScopedHString::Create(uwp_device_id);

  // Check capture effects for two different audio processing modes:
  // - Default: Normal audio signal processing
  // - Raw: Minimal audio signal processing
  // Raw is included since it is not possible to disable all effects on all
  // devices. In most cases, the number of found capture effects will be zero
  // for the raw mode.
  ABI::Windows::Media::AudioProcessing audio_processing_mode[] = {
      ABI::Windows::Media::AudioProcessing::AudioProcessing_Default,
      ABI::Windows::Media::AudioProcessing::AudioProcessing_Raw};
  for (size_t i = 0; i < std::size(audio_processing_mode); ++i) {
    // Create an AudioCaptureEffectsManager manager which can be used to
    // discover the audio processing chain on a device for a specific media
    // category and audio processing mode. The media category is fixed and set
    // to Communications since that is what we aim at using when audio effects
    // later are disabled.
    ComPtr<IAudioCaptureEffectsManager> capture_effects_manager;
    hr = audio_effects_manager->CreateAudioCaptureEffectsManagerWithMode(
        device_id.get(),
        ABI::Windows::Media::Capture::MediaCategory::
            MediaCategory_Communications,
        audio_processing_mode[i], &capture_effects_manager);
    if (FAILED(hr)) {
      SendLogMessage(
          "%s => (ERROR: IAudioEffectsManagerStatics::"
          "CreateAudioCaptureEffectsManager=[%s])",
          __func__, ErrorToString(hr).c_str());
      return hr;
    }

    // Get a list of audio effects on the device. Based on tests on different
    // devices, only enabled effects will be included. Hence, if a user has
    // explicitly disabled an effect using the System Sound Settings, that
    // component will not show up here.
    ComPtr<IVectorView<ABI::Windows::Media::Effects::AudioEffect*>> effects;
    hr = capture_effects_manager->GetAudioCaptureEffects(&effects);
    if (FAILED(hr)) {
      SendLogMessage(
          "%s => (ERROR: IAudioCaptureEffectsManager::"
          "GetAudioCaptureEffects=[%s])",
          __func__, ErrorToString(hr).c_str());
      return hr;
    }

    unsigned int count = 0;
    if (effects) {
      // Returns number of supported effects.
      effects->get_Size(&count);
    }

    // Store all supported and active effect types in |default_effect_types_|
    // or |raw_effect_types_| depending on selected audio processing mode.
    // These will be utilized later for UMA histograms.
    for (unsigned int j = 0; j < count; ++j) {
      ComPtr<ABI::Windows::Media::Effects::IAudioEffect> effect;
      hr = effects->GetAt(j, &effect);
      if (SUCCEEDED(hr)) {
        ABI::Windows::Media::Effects::AudioEffectType type;
        hr = effect->get_AudioEffectType(&type);
        if (SUCCEEDED(hr)) {
          audio_processing_mode[i] ==
                  ABI::Windows::Media::AudioProcessing::AudioProcessing_Default
              ? default_effect_types_.push_back(type)
              : raw_effect_types_.push_back(type);
        }
      }
    }

    // For cases when no audio effects were found (common in raw mode), add a
    // dummy effect type called AudioEffectType_Other so that the vector
    // contains at least one value. This is done to ensure that an UMA histogram
    // is uploaded also for the empty case. Hence, AudioEffectType_Other is
    // used to indicate an unknown audio effect and "no audio effect found".
    if (count == 0) {
      const ABI::Windows::Media::Effects::AudioEffectType no_effect_found =
          ABI::Windows::Media::Effects::AudioEffectType::AudioEffectType_Other;
      audio_processing_mode[i] ==
              ABI::Windows::Media::AudioProcessing::AudioProcessing_Default
          ? default_effect_types_.push_back(no_effect_found)
          : raw_effect_types_.push_back(no_effect_found);
    }
  }

  return hr;
}

HRESULT WASAPIAudioInputStream::GetAudioEngineNumChannels(WORD* channels) {
  DCHECK(audio_client_.Get());
  SendLogMessage("%s()", __func__);
  WAVEFORMATEXTENSIBLE mix_format;
  // Retrieve the stream format that the audio engine uses for its internal
  // processing of shared-mode streams.
  HRESULT hr =
      CoreAudioUtil::GetSharedModeMixFormat(audio_client_.Get(), &mix_format);
  if (SUCCEEDED(hr)) {
    // Return the native number of supported audio channels.
    CoreAudioUtil::WaveFormatWrapper wformat(&mix_format);
    *channels = wformat->nChannels;
    SendLogMessage("%s => (native channels=[%d])", __func__, *channels);
  }
  return hr;
}

HRESULT
WASAPIAudioInputStream::SetCommunicationsCategoryAndMaybeRawCaptureMode(
    WORD channels) {
  DCHECK(audio_client_.Get());
  DCHECK(!AudioDeviceDescription::IsLoopbackDevice(device_id_));
  DCHECK(raw_processing_supported_);
  SendLogMessage("%s({channels=%d})", __func__, channels);

  Microsoft::WRL::ComPtr<IAudioClient2> audio_client2;
  HRESULT hr = audio_client_.As(&audio_client2);
  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: IAudioClient2 is not supported)", __func__);
    return hr;
  }
  // Use IAudioClient2::SetClientProperties() to set communications category
  // and to enable raw stream capture if it is supported.
  if (audio_client2.Get()) {
    AudioClientProperties audio_props = {0};
    audio_props.cbSize = sizeof(AudioClientProperties);
    audio_props.bIsOffload = false;
    // AudioCategory_Communications opts us in to communications policy and
    // communications processing. AUDCLNT_STREAMOPTIONS_RAW turns off the
    // processing, but not the policy.
    audio_props.eCategory = AudioCategory_Communications;
    // The audio stream is a 'raw' stream that bypasses all signal processing
    // except for endpoint specific, always-on processing in the Audio
    // Processing Object (APO), driver, and hardware.
    // See https://crbug.com/1257662 for details on why we avoid using raw
    // capture mode on devices with more than eight input channels.
    if (channels > 0 && channels <= media::kMaxConcurrentChannels) {
      audio_props.Options = AUDCLNT_STREAMOPTIONS_RAW;
    }
    hr = audio_client2->SetClientProperties(&audio_props);
    if (FAILED(hr)) {
      SendLogMessage("%s => (ERROR: IAudioClient2::SetClientProperties=[%s])",
                     __func__, ErrorToString(hr).c_str());
    }
  }
  return hr;
}

bool WASAPIAudioInputStream::DesiredFormatIsSupported(HRESULT* hr) {
  SendLogMessage("%s()", __func__);
  // An application that uses WASAPI to manage shared-mode streams can rely
  // on the audio engine to perform only limited format conversions. The audio
  // engine can convert between a standard PCM sample size used by the
  // application and the floating-point samples that the engine uses for its
  // internal processing. However, the format for an application stream
  // typically must have the same number of channels and the same sample
  // rate as the stream format used by the device.
  // Many audio devices support both PCM and non-PCM stream formats. However,
  // the audio engine can mix only PCM streams.
  base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
  HRESULT hresult = audio_client_->IsFormatSupported(
      AUDCLNT_SHAREMODE_SHARED,
      reinterpret_cast<const WAVEFORMATEX*>(&input_format_), &closest_match);
  if (FAILED(hresult)) {
    SendLogMessage("%s => (ERROR: IAudioClient::IsFormatSupported=[%s])",
                   __func__, ErrorToString(hresult).c_str());
  }
  if (hresult == S_FALSE) {
    SendLogMessage(
        "%s => (WARNING: Format is not supported but a closest match exists)",
        __func__);
    // Change the format we're going to ask for to better match with what the OS
    // can provide.  If we succeed in initializing the audio client in this
    // format and are able to convert from this format, we will do that
    // conversion.
    WAVEFORMATEX* input_format = &input_format_.Format;
    input_format->nChannels = closest_match->nChannels;
    input_format->nSamplesPerSec = closest_match->nSamplesPerSec;

    // If the closest match is fixed point PCM (WAVE_FORMAT_PCM or
    // KSDATAFORMAT_SUBTYPE_PCM), we use the closest match's bits per sample.
    // Otherwise, we keep the bits sample as is since we still request fixed
    // point PCM. In that case the closest match is typically in float format
    // (KSDATAFORMAT_SUBTYPE_IEEE_FLOAT).
    if (CoreAudioUtil::WaveFormatWrapper(closest_match.get()).IsPcm()) {
      input_format->wBitsPerSample = closest_match->wBitsPerSample;
    }

    input_format->nBlockAlign =
        (input_format->wBitsPerSample / 8) * input_format->nChannels;
    input_format->nAvgBytesPerSec =
        input_format->nSamplesPerSec * input_format->nBlockAlign;

    if (IsSupportedFormatForConversion(&input_format_)) {
      SendLogMessage(
          "%s => (WARNING: Captured audio will be converted: [%s] ==> [%s])",
          __func__, CoreAudioUtil::WaveFormatToString(&input_format_).c_str(),
          CoreAudioUtil::WaveFormatToString(&output_format_).c_str());
      SetupConverterAndStoreFormatInfo();

      // Indicate that we're good to go with a close match.
      hresult = S_OK;
    }
  }

  // At this point, |hresult| == S_OK if the desired format is supported. If
  // |hresult| == S_FALSE, the OS supports a closest match but we don't support
  // conversion to it. Thus, SUCCEEDED() or FAILED() can't be used to determine
  // if the desired format is supported.
  *hr = hresult;
  return (hresult == S_OK);
}

void WASAPIAudioInputStream::SetupConverterAndStoreFormatInfo() {
  // Ideally, we want a 1:1 ratio between the buffers we get and the buffers
  // we give to OnData so that each buffer we receive from the OS can be
  // directly converted to a buffer that matches with what was asked for.
  const double buffer_ratio =
      output_format_.nSamplesPerSec / static_cast<double>(packet_size_frames_);
  double new_frames_per_buffer =
      input_format_.Format.nSamplesPerSec / buffer_ratio;

  const auto input_layout =
      ChannelLayoutConfig::Guess(input_format_.Format.nChannels);
  DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, input_layout.channel_layout());
  const auto output_layout =
      ChannelLayoutConfig::Guess(output_format_.nChannels);
  DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, output_layout.channel_layout());

  const AudioParameters input(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                              input_layout, input_format_.Format.nSamplesPerSec,
                              static_cast<int>(new_frames_per_buffer));

  const AudioParameters output(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                               output_layout, output_format_.nSamplesPerSec,
                               packet_size_frames_);

  converter_ = std::make_unique<AudioConverter>(input, output, false);
  converter_->AddInput(this);
  converter_->PrimeWithSilence();
  convert_bus_ = AudioBus::Create(output);

  // Update our packet size assumptions based on the new format.
  const auto new_bytes_per_buffer = static_cast<int>(new_frames_per_buffer) *
                                    input_format_.Format.nBlockAlign;
  packet_size_frames_ = new_bytes_per_buffer / input_format_.Format.nBlockAlign;
  packet_size_bytes_ = new_bytes_per_buffer;
  frame_size_bytes_ = input_format_.Format.nBlockAlign;

  imperfect_buffer_size_conversion_ =
      std::modf(new_frames_per_buffer, &new_frames_per_buffer) != 0.0;
  if (imperfect_buffer_size_conversion_) {
    SendLogMessage("%s => (WARNING: Audio capture conversion requires a FIFO)",
                   __func__);
  }
}

HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
  DCHECK_EQ(OPEN_RESULT_OK, open_result_);
  SendLogMessage("%s()", __func__);

  DWORD flags;
  // Use event-driven mode only for regular input devices. For loopback the
  // EVENTCALLBACK flag is specified when initializing
  // |audio_render_client_for_loopback_|.
  if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
    flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
  } else {
    flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
  }

  // Initialize the audio stream between the client and the device.
  // We connect indirectly through the audio engine by using shared mode.
  // The buffer duration is set to 100 ms, which reduces the risk of glitches.
  // It would normally be set to 0 and the minimum buffer size to ensure that
  // glitches do not occur would be used (typically around 22 ms). There are
  // however cases when there are glitches anyway and it's avoided by setting a
  // larger buffer size. The larger size does not create higher latency for
  // properly implemented drivers.
  HRESULT hr = audio_client_->Initialize(
      AUDCLNT_SHAREMODE_SHARED, flags,
      100 * 1000 * 10,  // Buffer duration, 100 ms expressed in 100-ns units.
      0,                // Device period, n/a for shared mode.
      reinterpret_cast<const WAVEFORMATEX*>(&input_format_),
      AudioDeviceDescription::IsCommunicationsDevice(device_id_)
          ? &kCommunicationsSessionId
          : nullptr);

  if (FAILED(hr)) {
    SendLogMessage("%s => (ERROR: IAudioClient::Initialize=[%s])", __func__,
                   ErrorToString(hr).c_str());
    open_result_ = OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED;
    base::UmaHistogramSparse("Media.Audio.Capture.Win.InitError", hr);
    MaybeReportFormatRelatedInitError(hr);
    return hr;
  }

  // Retrieve the length of the endpoint buffer shared between the client
  // and the audio engine. The buffer length determines the maximum amount
  // of capture data that the audio engine can read from the endpoint buffer
  // during a single processing pass.
  hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_GET_BUFFER_SIZE_FAILED;
    return hr;
  }
  const int endpoint_buffer_size_ms =
      static_cast<double>(endpoint_buffer_size_frames_ * 1000) /
          input_format_.Format.nSamplesPerSec +
      0.5;
  SendLogMessage("%s => (endpoint_buffer_size_frames=%u (%d ms))", __func__,
                 endpoint_buffer_size_frames_, endpoint_buffer_size_ms);

#ifndef NDEBUG
  // The period between processing passes by the audio engine is fixed for a
  // particular audio endpoint device and represents the smallest processing
  // quantum for the audio engine. This period plus the stream latency between
  // the buffer and endpoint device represents the minimum possible latency
  // that an audio application can achieve.
  REFERENCE_TIME device_period_shared_mode = 0;
  REFERENCE_TIME device_period_exclusive_mode = 0;
  HRESULT hr_dbg = audio_client_->GetDevicePeriod(
      &device_period_shared_mode, &device_period_exclusive_mode);
  if (SUCCEEDED(hr_dbg)) {
    // The 5000 addition is to round end result to closest integer.
    const int device_period_ms = (device_period_shared_mode + 5000) / 10000;
    DVLOG(1) << "Device period: " << device_period_ms << " ms";
  }

  REFERENCE_TIME latency = 0;
  hr_dbg = audio_client_->GetStreamLatency(&latency);
  if (SUCCEEDED(hr_dbg)) {
    // The 5000 addition is to round end result to closest integer.
    const int latency_ms = (device_period_shared_mode + 5000) / 10000;
    DVLOG(1) << "Stream latency: " << latency_ms << " ms";
  }
#endif

  // Set the event handle that the audio engine will signal each time a buffer
  // becomes ready to be processed by the client.
  //
  // In loopback case the capture device doesn't receive any events, so we
  // need to create a separate playback client to get notifications. According
  // to MSDN:
  //
  //   A pull-mode capture client does not receive any events when a stream is
  //   initialized with event-driven buffering and is loopback-enabled. To
  //   work around this, initialize a render stream in event-driven mode. Each
  //   time the client receives an event for the render stream, it must signal
  //   the capture client to run the capture thread that reads the next set of
  //   samples from the capture endpoint buffer.
  //
  // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
  if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
    SendLogMessage("%s => (WARNING: loopback mode is selected)", __func__);
    hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr,
                                    &audio_render_client_for_loopback_);
    if (FAILED(hr)) {
      open_result_ = OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED;
      return hr;
    }

    hr = audio_render_client_for_loopback_->Initialize(
        AUDCLNT_SHAREMODE_SHARED,
        AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST, 0, 0,
        reinterpret_cast<const WAVEFORMATEX*>(&input_format_),
        AudioDeviceDescription::IsCommunicationsDevice(device_id_)
            ? &kCommunicationsSessionId
            : nullptr);
    if (FAILED(hr)) {
      open_result_ = OPEN_RESULT_LOOPBACK_INIT_FAILED;
      return hr;
    }

    hr = audio_render_client_for_loopback_->SetEventHandle(
        audio_samples_ready_event_.Get());
  } else {
    hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
  }

  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_SET_EVENT_HANDLE;
    return hr;
  }

  // Get access to the IAudioCaptureClient interface. This interface
  // enables us to read input data from the capture endpoint buffer.
  hr = audio_client_->GetService(IID_PPV_ARGS(&audio_capture_client_));
  if (FAILED(hr)) {
    open_result_ = OPEN_RESULT_NO_CAPTURE_CLIENT;
    return hr;
  }

  // Obtain a reference to the ISimpleAudioVolume interface which enables
  // us to control the master volume level of an audio session.
  hr = audio_client_->GetService(IID_PPV_ARGS(&simple_audio_volume_));
  if (FAILED(hr))
    open_result_ = OPEN_RESULT_NO_AUDIO_VOLUME;

  return hr;
}

void WASAPIAudioInputStream::ReportOpenResult(HRESULT hr) {
  DCHECK(!opened_);
  UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_,
                            OPEN_RESULT_MAX + 1);
  if (open_result_ != OPEN_RESULT_OK &&
      open_result_ != OPEN_RESULT_OK_WITH_RESAMPLING) {
    SendLogMessage(
        "%s", GetOpenLogString(open_result_, hr, input_format_, output_format_)
                  .c_str());
  }
}

void WASAPIAudioInputStream::MaybeReportFormatRelatedInitError(
    HRESULT hr) const {
  if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT && hr != E_INVALIDARG)
    return;

  const FormatRelatedInitError format_related_error =
      hr == AUDCLNT_E_UNSUPPORTED_FORMAT
          ? converter_.get()
                ? FormatRelatedInitError::kUnsupportedFormatWithFormatConversion
                : FormatRelatedInitError::kUnsupportedFormat
      // Otherwise |hr| == E_INVALIDARG.
      : converter_.get()
          ? FormatRelatedInitError::kInvalidArgumentWithFormatConversion
          : FormatRelatedInitError::kInvalidArgument;
  base::UmaHistogramEnumeration(
      "Media.Audio.Capture.Win.InitError.FormatRelated", format_related_error,
      FormatRelatedInitError::kCount);
}

double WASAPIAudioInputStream::ProvideInput(
    AudioBus* audio_bus,
    uint32_t frames_delayed,
    const AudioGlitchInfo& glitch_info) {
  fifo_->Consume()->CopyTo(audio_bus);
  return 1.0;
}

void WASAPIAudioInputStream::ReportAndResetGlitchStats() {
  SystemGlitchReporter::Stats stats =
      glitch_reporter_.GetLongTermStatsAndReset();
  SendLogMessage(
      "%s => (num_glitches_detected=[%d], cumulative_audio_lost=[%llu ms], "
      "largest_glitch=[%llu ms])",
      __func__, stats.glitches_detected,
      stats.total_glitch_duration.InMilliseconds(),
      stats.largest_glitch_duration.InMilliseconds());

  int num_data_discontinuities =
      data_discontinuity_reporter_->GetLongTermDiscontinuityCountAndReset();
  SendLogMessage("%s => (discontinuity warnings=[%d])", __func__,
                 num_data_discontinuities);
  SendLogMessage("%s => (timstamp errors=[%" PRIu64 "])", __func__,
                 num_timestamp_errors_);
  if (num_timestamp_errors_ > 0) {
    SendLogMessage("%s => (time until first timestamp error=[%" PRId64 " ms])",
                   __func__,
                   time_until_first_timestamp_error_.InMilliseconds());
  }

  expected_next_device_position_ = 0;
  num_timestamp_errors_ = 0;
}

}  // namespace media