Notice: Original Clap Recognition Model Source is from [this thread in stackoverflow](http://stackoverflow.com/questions/4160175/detect-tap-with-pyaudio-from-live-mic).

In [14]:
import pyaudio
import struct
import math
import time
from djitellopy import Tello
import cv2
import time

In [15]:
#defaults
INITIAL_TAP_THRESHOLD = 0.25 #The RMS amplitude threshold to initially classify a sound as a tap.
FORMAT = pyaudio.paInt16 #The audio data format (paInt16 for 16-bit audio).
SHORT_NORMALIZE = (1.0/32768.0) #Used to normalize audio sample values to the range [-1.0, 1.0].
CHANNELS = 2 #Number of audio channels (stereo in this case).
RATE = 44100  #Sampling rate in Hz (samples per second).
INPUT_BLOCK_TIME = 0.05 #Duration of each audio block in seconds (0.05 s).
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) #Number of frames per audio block.

# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
#Control how the threshold adapts to noise conditions.

# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
#Maximum duration of consecutive noisy blocks that are considered a tap.

In [16]:
#original code, will be overwritten by new code with counter

def get_rms( block ):
    # RMS amplitude is defined as the square root of the 
    # mean over time of the square of the amplitude.
    # so we need to convert this string of bytes into 
    # a string of 16-bit samples...

    # we will get one short out for each 
    # two chars in the string.
    count = len(block)/2
    format = "%dh"%(count)
    shorts = struct.unpack( format, block )

    # iterate over the block.
    sum_squares = 0.0
    for sample in shorts:
        # sample is a signed short in +/- 32768. 
        # normalize it to 1.0
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n

    return math.sqrt( sum_squares / count )
#RMS amplitude is a measure of the audio signal's power, which helps identify loud noises.

class TapTester(object):
    def __init__(self): #Initializes the PyAudio instance, opens the microphone stream, and sets initial thresholds and counters.
        self.pa = pyaudio.PyAudio() #pa: A PyAudio object for handling audio I/O.
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0

    def stop(self): #Closes the audio stream.
        self.stream.close()

    def find_input_device(self): 
        #Searches for an audio input device (e.g., a microphone). Prints available devices and selects one containing "mic" or "input" in its name.
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%(i,devinfo["name"]) )
                    device_index = i
                    return device_index

        if device_index == None:
            print( "No preferred input found; using default input device." )

        return device_index

    def open_mic_stream( self ): #Opens an audio input stream using the chosen device.
        device_index = self.find_input_device()

        stream = self.pa.open(   format = FORMAT,
                                 channels = CHANNELS,
                                 rate = RATE,
                                 input = True,
                                 input_device_index = device_index,
                                 frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

        return stream

    def tapDetected(self): #DETECTED 
        #A placeholder method triggered when a tap is detected. (Currently, it just prints "tapped.")
        print ("tapped")

    def listen(self): #Processes audio input in blocks
        # Reads an audio block.
        # Computes its RMS amplitude.
        # Updates counters based on whether the block is noisy or quiet.
        # Detects taps when noisy blocks are followed by quiet ones within the allowed duration (MAX_TAP_BLOCKS)
        
        try:
            block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
        except Exception as e:
            # dammit. 
            self.errorcount += 1
            print( "(%d) Error recording: %s"%(self.errorcount,e) )
            self.noisycount = 1
            return

        amplitude = get_rms( block )
        if amplitude > self.tap_threshold:
            # noisy block
            self.quietcount = 0
            self.noisycount += 1
        else:            
            # quiet block.

            if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
                self.tapDetected()
            self.noisycount = 0
            self.quietcount += 1

Tello Clap-Command below:

In [24]:
#with tello command and counter

class TapTester(object):
    def __init__(self, tello_distance = 30, clap_timeout=3): 
        self.pa = pyaudio.PyAudio() 
        self.stream = self.open_mic_stream()
        self.tap_threshold = INITIAL_TAP_THRESHOLD
        self.noisycount = MAX_TAP_BLOCKS+1 
        self.quietcount = 0 
        self.errorcount = 0
        self.last_clap_time = time.perf_counter()
        self.clap_timeout = 5
        self.clap_count = 0
        self.tello_distance = 30

    def stop(self): 
        self.stream.close()

    def find_input_device(self): 
        device_index = None            
        for i in range( self.pa.get_device_count() ):     
            devinfo = self.pa.get_device_info_by_index(i)   
            print( "Device %d: %s"%(i,devinfo["name"]) )

            for keyword in ["mic","input"]:
                if keyword in devinfo["name"].lower():
                    print( "Found an input: device %d - %s"%(i,devinfo["name"]) )
                    device_index = i
                    return device_index

        if device_index == None:
            print( "No preferred input found; using default input device." )

        return device_index

    def open_mic_stream( self ): 
        device_index = self.find_input_device()

        stream = self.pa.open(   format = FORMAT,
                                 channels = CHANNELS,
                                 rate = RATE,
                                 input = True,
                                 input_device_index = device_index,
                                 frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

        return stream

    def tapDetected(self): #DETECTED 
        self.clap_count += 1
        print(f'Clap count: {self.clap_count}')        

    def listen(self):        
        try:
            block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
        except Exception as e:
            self.errorcount += 1
            print( "(%d) Error recording: %s"%(self.errorcount,e) )
            self.noisycount = 1
            return

        amplitude = get_rms( block )
        if amplitude > self.tap_threshold:
            self.quietcount = 0
            self.noisycount += 1
        else:            
            if 1 <= self.noisycount <= MAX_TAP_BLOCKS:
                self.tapDetected()
            self.noisycount = 0
            self.quietcount += 1

        base_time = time.perf_counter()
        trigger = base_time - self.last_clap_time > self.clap_timeout
        if trigger:
                self.actionTrigger()
                self.clap_count = 0
                self.last_clap_time = base_time

    def actionTrigger(self):
        if self.clap_count == 1:
            tello.takeoff()
            time.sleep(1)
            print("Awaiting command.")
        elif self.clap_count == 2:
            tello.move_up(self.tello_distance)
            time.sleep(1)
            print("Awaiting command.")
        elif self.clap_count == 3:
            tello.move_down(self.tello_distance)
            time.sleep(1)
            print("Awaiting command.")
        elif self.clap_count == 4:
            tello.flip_right()
            time.sleep(1)
            print("Awaiting command.")
        elif self.clap_count == 5:
            tello.land()
            time.sleep(1)
            print("Awaiting command.")
        elif self.clap_count == 7:
            self.stop()


In [None]:
#initiating connection to the tello drone
#checking battery level as well

print("Create Tello object")
tello = Tello()

print("Connect to Tello Drone")
tello.connect()

battery_level = tello.get_battery()
print(f"Battery Life Percentage: {battery_level}")

In [19]:
#defaults
INITIAL_TAP_THRESHOLD = 0.25 #The RMS amplitude threshold to initially classify a sound as a tap.
FORMAT = pyaudio.paInt16 #The audio data format (paInt16 for 16-bit audio).
SHORT_NORMALIZE = (1.0/32768.0) #Used to normalize audio sample values to the range [-1.0, 1.0].
CHANNELS = 2 #Number of audio channels (stereo in this case).
RATE = 44100  #Sampling rate in Hz (samples per second).
INPUT_BLOCK_TIME = 0.05 #Duration of each audio block in seconds (0.05 s).
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME) #Number of frames per audio block.

# if we get this many noisy blocks in a row, increase the threshold
OVERSENSITIVE = 15.0/INPUT_BLOCK_TIME                    
# if we get this many quiet blocks in a row, decrease the threshold
UNDERSENSITIVE = 120.0/INPUT_BLOCK_TIME 
#Control how the threshold adapts to noise conditions.

# if the noise was longer than this many blocks, it's not a 'tap'
MAX_TAP_BLOCKS = 0.15/INPUT_BLOCK_TIME
#Maximum duration of consecutive noisy blocks that are considered a tap.

In [None]:
#starting code, please wait for the "Awaiting command." before clapping again

INITIAL_TAP_THRESHOLD = 0.05

if __name__ == "__main__":
    tt = TapTester()
    for i in range(1000):
        tt.listen()


**Current Modes**

Number of Claps within three seconds:
* 1 - Takeoff
* 2 - Move Up (default 30 cm)
* 3 - Move Down (default 30 cm)
* 4 - Flip Right 
* 5 - Land 
* 7 - Stop the TapTester 

### Configuring Audio Device 

In [None]:
#configure audio devices if needed

for i in range(0,tt.pa.get_device_count()):
    print(f'{tt.pa.get_device_info_by_index(i)["index"]}, {tt.pa.get_device_info_by_index(i)["name"]}')

Some common solutions to unrecognisable clap issues:

1. Check if your input device has set its input volume to 100
2. Turn on the voice enhancement (**voice focus**) if you have, suaully available from Microsoft Windows 11 24H2 update
3. If no voice enhancement is found, try connecting an earphone or headset and set volume to 100.
4. Make sure default audio is turned off (you can turn it on again by going to All Sound Devices found in Troubleshooting Settings under Sound)
5. Try initial tap treshold to 0.05 to 0.1 or play around them.