In [6]:
import librosa
import numpy as np

def detect_applause(audio_path, threshold=0.3, frame_length=2048, hop_length=512):
    y, sr = librosa.load(audio_path, sr=None)

    # Compute the short-time energy of the audio signal
    energy = np.array([
        sum(abs(y[i:i+frame_length]**2))
        for i in range(0, len(y), hop_length)
    ])
    
    # Normalize the energy values
    energy = energy / np.max(energy)
    
    # Identify frames where the energy exceeds the threshold
    applause_frames = np.where(energy > threshold)[0]
    
    # Convert frame indices to time in seconds
    applause_times = applause_frames * hop_length / sr
    
    # Group applause times to identify distinct applause events
    applause_events = []
    current_event = []
    for t in applause_times:
        if not current_event or t - current_event[-1] < 1:
            current_event.append(t)
        else:
            applause_events.append(current_event)
            current_event = [t]
    if current_event:
        applause_events.append(current_event)
    
    # Extract the start time of each applause event
    applause_start_times = [event[0] for event in applause_events]
    
    return applause_start_times

In [3]:
# Usage
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path)
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Aplausos detectados en los siguientes segundos: [27.840725623582767, 59.779773242630384, 76.47492063492064, 117.4465306122449, 124.18031746031745, 148.80507936507937, 185.3532879818594, 196.1273469387755, 198.06621315192743, 207.80698412698413, 237.42403628117913, 238.4689342403628, 240.1059410430839, 277.37396825396826, 281.7625396825397, 285.3848526077098, 309.9631746031746, 358.3303401360544, 412.50249433106575, 435.06068027210887, 443.895873015873, 460.69551020408164, 466.45405895691607, 467.4873469387755, 496.8489795918367, 503.44344671201816, 513.1609977324263, 558.347029478458, 565.4523356009071, 604.9726984126984, 615.1314285714286, 631.1067573696145, 640.835918367347, 653.8507029478458, 684.4546031746032, 698.9090249433107, 733.2165079365079, 752.8489795918367, 807.9847619047619, 839.412970521542, 860.9843083900226, 946.9097505668934, 958.5777777777778, 978.8371882086168, 997.4828117913833, 1023.2685714285715, 1027.2972335600907, 1066.8872562358276, 1069.244081632653, 1076.848

In [12]:
threshold=0.5
frame_length=1024
hop_length=1024
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

KeyboardInterrupt: 

In [13]:
threshold=0.5
frame_length=2048
hop_length=512
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")
# Aplausos detectados en los siguientes segundos: [6.246167800453515, 10.414149659863945, 15.348390022675737, 16.579047619047618, 19.9459410430839, 26.145668934240362, 29.326802721088434, 34.586122448979594, 36.768798185941044, 38.39419501133787, 39.787392290249436, 43.270385487528344, 45.859410430839006, 50.561451247165536, 53.35945578231293, 56.679909297052156, 59.013514739229024, 63.332426303854874, 65.85179138321996, 68.46403628117913, 72.8409977324263, 77.93777777777778, 85.82095238095238, 93.03074829931973, 96.58340136054422, 101.28544217687075, 104.30403628117914, 106.69569160997733, 109.79555555555555, 127.40789115646258, 131.12308390022676, 132.98068027210886, 135.58131519274377, 143.31356009070294, 145.76326530612246, 148.7934693877551, 152.58993197278912, 155.2950566893424, 161.52961451247165, 168.8671201814059, 172.5010430839002, 176.57614512471656

Cantidad de aplausos detectados: 479
Aplausos detectados en los siguientes segundos: [27.840725623582767, 59.779773242630384, 76.47492063492064, 117.4465306122449, 124.18031746031745, 148.80507936507937, 185.3532879818594, 196.1273469387755, 198.06621315192743, 207.80698412698413, 237.42403628117913, 238.4689342403628, 240.1059410430839, 277.37396825396826, 281.7625396825397, 285.3848526077098, 309.9631746031746, 358.3303401360544, 412.50249433106575, 435.06068027210887, 443.895873015873, 460.69551020408164, 466.45405895691607, 467.4873469387755, 496.8489795918367, 503.44344671201816, 513.1609977324263, 558.347029478458, 565.4523356009071, 604.9726984126984, 615.1314285714286, 631.1067573696145, 640.835918367347, 653.8507029478458, 684.4546031746032, 698.9090249433107, 733.2165079365079, 752.8489795918367, 807.9847619047619, 839.412970521542, 860.9843083900226, 946.9097505668934, 958.5777777777778, 978.8371882086168, 997.4828117913833, 1023.2685714285715, 1027.2972335600907, 1066.88725

In [14]:
threshold=0.7
frame_length=2048
hop_length=512
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 27
Aplausos detectados en los siguientes segundos: [148.81668934240363, 435.0722902494331, 615.1430385487529, 752.860589569161, 2020.8326530612244, 2143.434013605442, 2575.940498866213, 2585.1472108843536, 2890.0948752834465, 2973.22231292517, 3878.533514739229, 3961.846712018141, 4105.53179138322, 4283.744943310658, 4751.000090702948, 4910.927528344671, 4946.314739229025, 5092.8210430839, 6356.926984126984, 7520.478911564626, 8334.558911564625, 8568.04716553288, 8571.28634920635, 10399.50947845805, 10907.573696145124, 10909.733151927438, 11163.794285714286]


In [15]:
threshold=0.7
frame_length=1024
hop_length=1024
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 202
Aplausos detectados en los siguientes segundos: [59.791383219954646, 117.46975056689342, 122.92643990929706, 124.203537414966, 148.81668934240363, 198.08943310657597, 207.8185941043084, 237.44725623582767, 238.4689342403628, 309.98639455782313, 358.35356009070296, 412.5257142857143, 435.0955102040816, 466.4888888888889, 503.4782766439909, 506.7522902494331, 565.4755555555555, 615.1430385487529, 640.8475283446712, 684.4778231292516, 752.8838095238095, 860.9959183673469, 946.932970521542, 958.589387755102, 997.5060317460318, 1069.2556916099772, 1076.871836734694, 1285.0387301587302, 1297.925804988662, 1366.8426303854876, 1382.748299319728, 1434.134058956916, 1436.8972335600906, 1483.6157823129251, 1573.6163265306122, 1671.6974149659864, 1762.162358276644, 1852.0468027210884, 1857.4338321995465, 1940.607709750567, 1965.3369614512471, 2020.8326530612244, 2095.6473469387756, 2127.3890249433107, 2143.3179138321993, 2151.6538775510203, 2176.6385487528346, 

In [16]:
threshold=0.7
frame_length=512
hop_length=1024
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 416
Aplausos detectados en los siguientes segundos: [27.86394557823129, 59.791383219954646, 117.46975056689342, 122.92643990929706, 124.203537414966, 198.08943310657597, 237.44725623582767, 238.4689342403628, 240.14077097505668, 250.45043083900228, 269.02639455782315, 285.4196825396825, 309.98639455782313, 316.7434013605442, 328.7713378684807, 358.35356009070296, 412.5257142857143, 435.0955102040816, 436.6512471655329, 466.4888888888889, 467.510566893424, 503.4782766439909, 506.7522902494331, 511.2801814058957, 513.1842176870748, 565.4755555555555, 569.2604081632653, 604.9959183673469, 615.1662585034013, 637.3180952380952, 640.8475283446712, 682.1558276643991, 684.4778231292516, 698.9438548752835, 737.5354195011338, 752.8838095238095, 808.0079818594104, 821.0575963718821, 833.5499319727891, 839.4478004535148, 850.1057596371882, 946.932970521542, 951.1125623582766, 958.589387755102, 964.6033560090702, 997.5060317460318, 1009.8590476190476, 1027.320453514

In [17]:
threshold=0.7
frame_length=512
hop_length=2048
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 200
Aplausos detectados en los siguientes segundos: [27.86394557823129, 122.92643990929706, 237.44725623582767, 238.4689342403628, 240.14077097505668, 250.45043083900228, 269.02639455782315, 285.4196825396825, 309.98639455782313, 412.5257142857143, 435.0955102040816, 466.4888888888889, 467.510566893424, 506.7522902494331, 569.2604081632653, 682.1558276643991, 684.4778231292516, 752.8838095238095, 808.0079818594104, 821.0575963718821, 833.5499319727891, 839.4478004535148, 964.6033560090702, 1030.6873469387756, 1249.28, 1357.995827664399, 1373.1816780045351, 1382.748299319728, 1436.8972335600906, 1537.9504761904761, 1573.6163265306122, 1666.4497052154195, 1758.2614058956917, 1762.162358276644, 1800.382403628118, 1846.590113378685, 1965.3369614512471, 2063.185850340136, 2095.6473469387756, 2143.480453514739, 2151.6538775510203, 2176.6385487528346, 2194.0070748299318, 2268.3573696145127, 2362.351746031746, 2434.0549659863946, 2484.256507936508, 2809.6145124

In [19]:
threshold=0.8
frame_length=512
hop_length=2048
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 78
Aplausos detectados en los siguientes segundos: [27.86394557823129, 237.44725623582767, 250.45043083900228, 285.4196825396825, 309.98639455782313, 412.5257142857143, 435.0955102040816, 466.4888888888889, 684.4778231292516, 752.8838095238095, 808.0079818594104, 1030.6873469387756, 1357.995827664399, 1382.748299319728, 1762.162358276644, 1800.382403628118, 1965.3369614512471, 2063.185850340136, 2095.6473469387756, 2151.6538775510203, 2194.0070748299318, 2362.351746031746, 2973.965351473923, 2977.1697052154195, 3114.9104761904764, 3476.538049886621, 3509.835464852608, 3724.4342857142856, 3836.958185941043, 4134.9166439909295, 4283.756553287982, 4894.72, 5092.832653061225, 5227.740589569161, 5416.843900226758, 5594.476553287982, 5625.9628117913835, 5699.570068027211, 5863.502947845805, 6006.770068027211, 6150.687346938776, 6261.9109297052155, 6409.91492063492, 6454.682993197279, 6566.463854875284, 6585.73641723356, 6688.229297052154, 6853.648253968254, 7

In [20]:
threshold=0.8
frame_length=512
hop_length=512
audio_path = 'audio.wav'
applause_times = detect_applause(audio_path,threshold=threshold,frame_length=frame_length,hop_length=hop_length)
print("Cantidad de aplausos detectados:", len(applause_times))
print(f"Aplausos detectados en los siguientes segundos: {applause_times}")

Cantidad de aplausos detectados: 109
Aplausos detectados en los siguientes segundos: [148.8282993197279, 237.44725623582767, 309.98639455782313, 412.5257142857143, 435.0955102040816, 466.4888888888889, 503.4782766439909, 565.4755555555555, 752.8838095238095, 808.0079818594104, 850.1057596371882, 946.932970521542, 958.6009977324263, 1367.1560997732427, 1382.748299319728, 1762.162358276644, 1852.0468027210884, 1940.607709750567, 2020.8442630385487, 2063.185850340136, 2143.3179138321993, 2194.0070748299318, 2236.557641723356, 2472.7626303854877, 2512.515192743764, 2898.0593197278913, 2973.233922902494, 2977.158095238095, 3033.5361451247168, 3036.1367800453513, 3057.8474376417234, 3114.9104761904764, 3476.538049886621, 3509.835464852608, 3593.6827210884353, 3854.210612244898, 3878.5567346938774, 3961.869931972789, 4105.5434013605445, 4262.545124716553, 4536.494149659864, 4573.7389569161, 4705.999818594104, 4751.023310657596, 4765.268752834467, 4894.72, 4910.939138321995, 4927.529795918367,