This experiment is to try out the leads found in https://claude.ai/chat/110cfe52-6cbc-4bb1-95b0-bdc830de908e (edu)

This is a common problem in signal processing, especially when dealing with noisy data. Here are a few approaches you could use:

1. Peak Prominence Method
- Instead of just looking at local maxima, consider the "prominence" of each peak
- A peak's prominence is the height difference between the peak and the highest point between it and a higher peak

2. Smoothing the Signal
- Apply smoothing techniques before finding peaks to reduce noise:
  - Moving average
  - Gaussian smoothing
  - Savitzky-Golay filter (particularly good for preserving peak shapes)

3. Peak Width Method
- Consider peaks that have a minimum width
- This helps eliminate narrow noise spikes

Let me create a Python implementation that demonstrates these approaches:



```python
import numpy as np
from scipy.signal import find_peaks, savgol_filter

def find_significant_peaks(arr, method='prominence', **kwargs):
    """
    Find significant peaks in array using different methods.
    
    Parameters:
    arr : array-like
        Input signal
    method : str
        'prominence' - Use peak prominence
        'width' - Use peak width
        'smooth_prominence' - Smooth signal first, then use prominence
        'smooth_savgol' - Use Savitzky-Golay filter before finding peaks
    
    kwargs:
        prominence: minimum prominence for peaks
        width: minimum width for peaks
        window_size: size of smoothing window
        polyorder: polynomial order for Savitzky-Golay filter
    """
    
    # Set default parameters
    prominence = kwargs.get('prominence', 1.0)
    width = kwargs.get('width', 1)
    window_size = kwargs.get('window_size', 5)
    polyorder = kwargs.get('polyorder', 2)
    
    if method == 'prominence':
        # Find peaks with minimum prominence
        peaks, _ = find_peaks(arr, prominence=prominence)
        
    elif method == 'width':
        # Find peaks with minimum width
        peaks, _ = find_peaks(arr, width=width)
        
    elif method == 'smooth_prominence':
        # Apply moving average smoothing
        kernel = np.ones(window_size) / window_size
        smoothed = np.convolve(arr, kernel, mode='valid')
        peaks, _ = find_peaks(smoothed, prominence=prominence)
        # Adjust peak indices for valid convolution
        peaks += window_size // 2
        
    elif method == 'smooth_savgol':
        # Apply Savitzky-Golay filter
        smoothed = savgol_filter(arr, window_size, polyorder)
        peaks, _ = find_peaks(smoothed, prominence=prominence)
    
    else:
        raise ValueError(f"Unknown method: {method}")
    
    return peaks

def analyze_peaks(arr, peaks):
    """
    Analyze the detected peaks and return relevant statistics
    """
    peak_values = arr[peaks]
    peak_stats = {
        'num_peaks': len(peaks),
        'mean_peak_height': np.mean(peak_values),
        'std_peak_height': np.std(peak_values),
        'peak_positions': peaks,
        'peak_values': peak_values
    }
    return peak_stats

# Example usage
if __name__ == "__main__":
    # Generate sample noisy data
    x = np.linspace(0, 10, 1000)
    # Create signal with two main peaks
    signal = 3*np.sin(x) + np.sin(5*x)
    # Add noise
    noisy_signal = signal + np.random.normal(0, 0.5, signal.shape)
    
    # Try different methods
    methods = ['prominence', 'width', 'smooth_prominence', 'smooth_savgol']
    results = {}
    
    for method in methods:
        peaks = find_significant_peaks(noisy_signal, method=method, 
                                     prominence=1.0, width=20, 
                                     window_size=51, polyorder=3)
        results[method] = analyze_peaks(noisy_signal, peaks)

```

This implementation provides several key features:

1. Multiple detection methods:
   - Pure prominence-based detection
   - Width-based detection
   - Smoothed signal with prominence detection
   - Savitzky-Golay filtered signal with peak detection

2. Configurable parameters:
   - Minimum prominence threshold
   - Minimum peak width
   - Smoothing window size
   - Polynomial order for Savitzky-Golay filter

3. Peak analysis functionality that provides statistics about detected peaks

To use this for your specific case, I'd recommend:

1. Start with the 'smooth_savgol' method, as it's generally good at preserving peak shapes while removing noise
2. Adjust the window_size parameter based on your signal's characteristics:
   - Larger window = more smoothing but might miss closer peaks
   - Smaller window = less smoothing but might detect noise
3. Tune the prominence parameter to match the minimum significant peak height you care about

Would you like me to explain how to tune these parameters for your specific data, or would you like to see an example with visualizations?

In [20]:
results

{}

In [21]:
import sys
fp = "/media/vedant/New Volume/DPythonProjects/yt_summarizer"

sys.path.insert(1, fp)   
fp = "/media/vedant/New Volume/DPythonProjects/yt_summarizer/v2"

sys.path.insert(1, fp)   

from v2.helper import Helper

Helper.log("Hello, world!")


Hello, world!


In [22]:
from processed_frame import ProcessedFrame

In [23]:
frames : ProcessedFrame = Helper.load_python_object("../v2/data/zypvgn_python_object/processed_frames.pkl")

In [24]:
noisy_signal, y = ProcessedFrame.get_data_for_plotting(frames)

In [25]:
noisy_signal[:5]

[0, 90, 180, 270, 360]

In [32]:
import numpy as np
from scipy.signal import find_peaks, savgol_filter

def find_significant_peaks(arr, method='prominence', **kwargs):
    """
    Find significant peaks in array using different methods.
    
    Parameters:
    arr : array-like
        Input signal
    method : str
        'prominence' - Use peak prominence
        'width' - Use peak width
        'smooth_prominence' - Smooth signal first, then use prominence
        'smooth_savgol' - Use Savitzky-Golay filter before finding peaks
    
    kwargs:
        prominence: minimum prominence for peaks
        width: minimum width for peaks
        window_size: size of smoothing window
        polyorder: polynomial order for Savitzky-Golay filter
    """
    
    # Set default parameters
    prominence = kwargs.get('prominence', 1.0)
    width = kwargs.get('width', 1)
    window_size = kwargs.get('window_size', 5)
    polyorder = kwargs.get('polyorder', 2)
    
    if method == 'prominence':
        # Find peaks with minimum prominence
        peaks, _ = find_peaks(arr, prominence=prominence)
        
    elif method == 'width':
        # Find peaks with minimum width
        peaks, _ = find_peaks(arr, width=width)
        
    elif method == 'smooth_prominence':
        # Apply moving average smoothing
        kernel = np.ones(window_size) / window_size
        smoothed = np.convolve(arr, kernel, mode='valid')
        peaks, _ = find_peaks(smoothed, prominence=prominence)
        # Adjust peak indices for valid convolution
        peaks += window_size // 2
        
    elif method == 'smooth_savgol':
        # Apply Savitzky-Golay filter
        smoothed = savgol_filter(arr, window_size, polyorder)
        peaks, _ = find_peaks(smoothed, prominence=prominence)
    
    else:
        raise ValueError(f"Unknown method: {method}")
    
    return peaks

def analyze_peaks(arr, peaks):
    """
    Analyze the detected peaks and return relevant statistics
    """
    peak_values = arr[peaks]
    peak_stats = {
        'num_peaks': len(peaks),
        'mean_peak_height': np.mean(peak_values),
        'std_peak_height': np.std(peak_values),
        'peak_positions': peaks,
        'peak_values': peak_values
    }
    return peak_stats

# Example usage
if __name__ == "__main__":
    # Generate sample noisy data
    # x = np.linspace(0, 10, 1000)
    # Create signal with two main peaks
    # signal = 3*np.sin(x) + np.sin(5*x)
    # Add noise
    # noisy_signal = signal + np.random.normal(0, 0.5, signal.shape)
    
    # Try different methods
    methods = ['prominence', 'width', 'smooth_prominence', 'smooth_savgol']
    results = {}
    
    for method in methods:
        peaks = find_significant_peaks(noisy_signal, method=method, 
                                     prominence=1.0, width=20, 
                                     window_size=51, polyorder=3)
        results[method] = analyze_peaks(noisy_signal, peaks)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


In [27]:
x = np.linspace(0, 10, 1000)
signal = 3*np.sin(x) + np.sin(5*x)
noisy_signal2 = signal + np.random.normal(0, 0.5, signal.shape)

In [28]:
type(noisy_signal2)

numpy.ndarray

In [29]:
type(noisy_signal)

list

In [30]:
# type of noisy_signal should be numpy.ndarray  
noisy_signal = np.array(noisy_signal)

In [31]:

type(noisy_signal)

numpy.ndarray

In [34]:
noisy_signal.shape

(235,)