In [2]:
import numpy as np
import cv2
import torch
import pyaudio

# ------------------------- #
# PyAudio initialization
# ------------------------- #
# Parameters for the audio stream
sample_rate = 96000
frequency = 440
pan = 0.5
duration = 0.3

# Initialize PyAudio and audio stream
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32, 
                channels=2, 
                rate=sample_rate, 
                output=True)

# define a function to generate the sound
def generate_sound(frequency, pan):
    # generate a sine wave with the given frequency
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    waveform = np.sin(2 * np.pi * frequency * t)

    # apply panning to the waveform
    left_waveform = waveform * (1 - pan)
    right_waveform = waveform * pan
    stereo_waveform = np.vstack((left_waveform, right_waveform)).T.flatten()

    # return the stereo waveform
    return stereo_waveform.astype(np.float32)

# ------------------------- #
# YOLO execution
# ------------------------- #

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Start video capture from default webcam
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the video stream
    ret, frame = cap.read()

    # Perform object detection on the frame
    results = model(frame)

    # Get the bounding box information of the detected objects
    bounding_boxes = results.xyxy[0].tolist()

    # Draw the bounding boxes on the frame and find largest person entity
    largest_person_box = None
    for box in bounding_boxes:
        x1, y1, x2, y2, conf_level, classtype = [int(coord) for coord in box]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        # Calculate area and append in 6th position
        width = x2 - x1
        height = y2 - y1
        box_area = width * height
        box.append(box_area)
        
        # Calculate center and append in 7th and 8th position
        center_x = (x1 + x2) / 2
        box.append(center_x)
        center_y = (y1 + y2) / 2
        box.append(center_y)

        # If a larger person is found, the main box is replaced
        if box[4] >= 0.4 and int(box[5]) == 0:
            if largest_person_box == None or box[6] > largest_person_box[6]:
                largest_person_box = box
        
    print(largest_person_box)

    # ------------------------- #
    # Audio generation
    # ------------------------- #

    if largest_person_box is not None:
        # Compute frequency based on an external factor (e.g. a sensor reading)
        panning_factor = largest_person_box[7] / cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        frequency_factor = 1
        
        frequency = 440 + frequency_factor
        pan = panning_factor
        print(panning_factor)

         # generate and play the sound
        waveform = generate_sound(frequency, pan)
        stream.write(waveform.tobytes())

    # Display the resulting frame
    cv2.imshow('Object detection', frame)

    # Exit on key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and close all windows
cap.release()
cv2.destroyAllWindows()

# Release audio stream
stream.stop_stream()
stream.close()
p.terminate()

Using cache found in C:\Users\enric/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-2-17 Python-3.8.16 torch-1.13.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


[148.427734375, 179.53172302246094, 491.70281982421875, 477.57012939453125, 0.8424650430679321, 0.0, 102214, 319.5, 328.0]
0.49921875
[147.21533203125, 178.66641235351562, 495.046630859375, 477.9825744628906, 0.8868729472160339, 0.0, 104052, 321.0, 327.5]
0.5015625
[144.07080078125, 179.96461486816406, 502.36187744140625, 477.4239501953125, 0.8261628746986389, 0.0, 106684, 323.0, 328.0]
0.5046875
[152.2229766845703, 174.34991455078125, 492.54632568359375, 477.5552978515625, 0.9036419987678528, 0.0, 103020, 322.0, 325.5]
0.503125
[145.85028076171875, 171.36795043945312, 498.59283447265625, 478.0672302246094, 0.87092524766922, 0.0, 108371, 321.5, 324.5]
0.50234375
[134.2798309326172, 162.5980987548828, 516.00830078125, 478.90838623046875, 0.7405898571014404, 0.0, 120712, 325.0, 320.0]
0.5078125
[92.4454345703125, 153.59841918945312, 493.30621337890625, 477.1476745605469, 0.8537678122520447, 0.0, 129924, 292.5, 315.0]
0.45703125
[64.81405639648438, 151.843994140625, 471.6186218261719, 474

[234.6313934326172, 113.82661437988281, 639.6131591796875, 473.9114990234375, 0.8708947896957397, 0.0, 145800, 436.5, 293.0]
0.68203125
[246.04237365722656, 105.63285827636719, 640.0, 474.60308837890625, 0.8147724270820618, 0.0, 145386, 443.0, 289.5]
0.6921875
[267.2703552246094, 79.64312744140625, 637.746826171875, 475.12261962890625, 0.8107644319534302, 0.0, 146520, 452.0, 277.0]
0.70625
[253.8618927001953, 75.634521484375, 640.0, 473.569580078125, 0.7387123107910156, 0.0, 154026, 446.5, 274.0]
0.69765625
[282.17047119140625, 81.02944946289062, 637.8218383789062, 472.7535095214844, 0.7393799424171448, 0.0, 138805, 459.5, 276.5]
0.71796875
[293.51483154296875, 79.81581115722656, 636.9033203125, 476.75897216796875, 0.7449796795845032, 0.0, 136171, 464.5, 277.5]
0.72578125
[304.7452392578125, 80.10096740722656, 638.6445922851562, 474.40460205078125, 0.5641208291053772, 0.0, 131596, 471.0, 277.0]
0.7359375
None
[314.5509338378906, 78.91554260253906, 634.5887451171875, 475.744140625, 0.68

[316.2215270996094, 168.64244079589844, 636.21142578125, 477.12994384765625, 0.8569341897964478, 0.0, 98880, 476.0, 322.5]
0.74375
[308.0099792480469, 169.4022979736328, 637.1436767578125, 477.25543212890625, 0.9161275029182434, 0.0, 101332, 472.5, 323.0]
0.73828125
[295.1522521972656, 168.3730010986328, 632.9608154296875, 476.37469482421875, 0.9060086011886597, 0.0, 103796, 463.5, 322.0]
0.72421875
[267.1111755371094, 167.5547332763672, 601.578125, 478.105224609375, 0.9213868975639343, 0.0, 103874, 434.0, 322.5]
0.678125
[238.59619140625, 169.9345703125, 565.3087768554688, 477.063720703125, 0.91954505443573, 0.0, 100716, 401.5, 323.0]
0.62734375
[193.46253967285156, 172.4056854248047, 522.013671875, 477.8807373046875, 0.9136697053909302, 0.0, 100345, 357.5, 324.5]
0.55859375
[175.18350219726562, 172.65374755859375, 514.3131103515625, 477.26171875, 0.9124571084976196, 0.0, 103395, 344.5, 324.5]
0.53828125
[121.93212890625, 171.33213806152344, 499.292724609375, 478.82867431640625, 0.878

[19.0286865234375, 66.28639221191406, 539.7247924804688, 478.626708984375, 0.7641115784645081, 0.0, 214240, 279.0, 272.0]
0.4359375
[24.51031494140625, 65.93536376953125, 544.8186645507812, 477.9342041015625, 0.73089200258255, 0.0, 214240, 284.0, 271.0]
0.44375
[35.66807556152344, 67.178466796875, 545.703125, 478.02886962890625, 0.8067025542259216, 0.0, 209610, 290.0, 272.5]
0.453125
[16.61138916015625, 66.40858459472656, 547.3365478515625, 478.001953125, 0.6921764016151428, 0.0, 218772, 281.5, 272.0]
0.43984375
[14.49481201171875, 67.14909362792969, 552.1182861328125, 477.862060546875, 0.8049395680427551, 0.0, 220580, 283.0, 272.0]
0.4421875
[15.144866943359375, 66.663330078125, 551.01318359375, 479.22552490234375, 0.7507511973381042, 0.0, 221368, 283.0, 272.5]
0.4421875
[16.298431396484375, 67.69119262695312, 549.83984375, 479.1129455566406, 0.7397193908691406, 0.0, 219596, 282.5, 273.0]
0.44140625
[15.943756103515625, 66.08056640625, 565.07958984375, 477.29498291015625, 0.6566071510

[13.6895751953125, 75.02885437011719, 570.1383666992188, 476.1903076171875, 0.7073431015014648, 0.0, 223357, 291.5, 275.5]
0.45546875
[8.726043701171875, 74.08885192871094, 560.7080078125, 479.58984375, 0.7418425679206848, 0.0, 223560, 284.0, 276.5]
0.44375
[11.780548095703125, 77.44444274902344, 561.149169921875, 475.4801025390625, 0.7361925840377808, 0.0, 218900, 286.0, 276.0]
0.446875
[18.4345703125, 73.02940368652344, 549.676513671875, 476.58856201171875, 0.7284701466560364, 0.0, 213993, 283.5, 274.5]
0.44296875
[27.255126953125, 71.47142028808594, 558.6049194335938, 478.84027099609375, 0.6840621829032898, 0.0, 216117, 292.5, 274.5]
0.45703125
[46.257720947265625, 71.02142333984375, 543.2933349609375, 479.22967529296875, 0.786921501159668, 0.0, 202776, 294.5, 275.0]
0.46015625
[15.073944091796875, 72.43777465820312, 547.1026611328125, 474.4189758300781, 0.7120903134346008, 0.0, 213864, 281.0, 273.0]
0.4390625
[14.72137451171875, 73.45700073242188, 549.135986328125, 474.157135009765

[68.94696044921875, 62.313385009765625, 633.3333129882812, 480.0, 0.7566850185394287, 0.0, 236170, 350.5, 271.0]
0.54765625
[106.12208557128906, 75.73446655273438, 610.126220703125, 480.0, 0.8316690325737, 0.0, 204120, 358.0, 277.5]
0.559375
[39.69073486328125, 68.84188842773438, 633.2509765625, 480.0, 0.6934003233909607, 0.0, 244728, 336.0, 274.0]
0.525
[115.33447265625, 76.277587890625, 623.2908935546875, 479.31036376953125, 0.848516583442688, 0.0, 204724, 369.0, 277.5]
0.5765625
[110.10446166992188, 74.19549560546875, 630.123291015625, 476.609130859375, 0.5837122797966003, 0.0, 209040, 370.0, 275.0]
0.578125
[96.52908325195312, 73.66294860839844, 632.4100341796875, 480.0, 0.5537999272346497, 0.0, 218152, 364.0, 276.5]
0.56875
[144.5242919921875, 85.17929077148438, 630.78662109375, 476.0813903808594, 0.6372213959693909, 0.0, 190026, 387.0, 280.5]
0.6046875
[120.14439392089844, 74.439697265625, 622.1400146484375, 477.43145751953125, 0.5598498582839966, 0.0, 202306, 371.0, 275.5]
0.579

[35.2198486328125, 77.35960388183594, 579.822021484375, 480.0, 0.8703484535217285, 0.0, 219232, 307.0, 278.5]
0.4796875
[35.078338623046875, 77.81423950195312, 580.659423828125, 480.0, 0.853381872177124, 0.0, 219635, 307.5, 278.5]
0.48046875
[32.540191650390625, 78.12582397460938, 575.8414306640625, 480.0, 0.8767719268798828, 0.0, 218286, 303.5, 279.0]
0.47421875
[35.58209228515625, 76.43141174316406, 579.4327392578125, 480.0, 0.8461665511131287, 0.0, 219776, 307.0, 278.0]
0.4796875
[34.24017333984375, 78.45281982421875, 578.6946411132812, 480.0, 0.8955445885658264, 0.0, 218688, 306.0, 279.0]
0.478125
[34.993927001953125, 78.47402954101562, 578.3956298828125, 480.0, 0.8591907024383545, 0.0, 218688, 306.0, 279.0]
0.478125
[36.407318115234375, 79.6038818359375, 583.538818359375, 478.359619140625, 0.8388315439224243, 0.0, 218253, 309.5, 278.5]
0.48359375
[36.41754150390625, 78.49525451660156, 577.966552734375, 480.0, 0.8762525916099548, 0.0, 217482, 306.5, 279.0]
0.47890625
[34.4865417480

[325.14337158203125, 182.15554809570312, 638.6115112304688, 476.0860290527344, 0.9227535724639893, 0.0, 92022, 481.5, 329.0]
0.75234375
[326.176025390625, 182.5256805419922, 637.2101440429688, 476.5162353515625, 0.9155659079551697, 0.0, 91434, 481.5, 329.0]
0.75234375
[330.54815673828125, 181.72775268554688, 636.3240356445312, 475.0710754394531, 0.9171426296234131, 0.0, 89964, 483.0, 328.0]
0.7546875
[329.9878845214844, 182.00662231445312, 636.197021484375, 476.7295227050781, 0.9191113114356995, 0.0, 90258, 482.5, 329.0]
0.75390625
[325.91986083984375, 176.90248107910156, 636.8361206054688, 476.352294921875, 0.8912594318389893, 0.0, 93300, 480.5, 326.0]
0.75078125
[309.30975341796875, 177.30743408203125, 634.0408325195312, 476.45550537109375, 0.9310720562934875, 0.0, 97175, 471.5, 326.5]
0.73671875
[289.99859619140625, 175.15872192382812, 621.0060424804688, 477.6970520019531, 0.917711615562439, 0.0, 100264, 455.0, 326.0]
0.7109375
[255.51898193359375, 172.72210693359375, 589.4486694335

[5.883636474609375, 15.653594970703125, 590.2740478515625, 480.0, 0.7501824498176575, 0.0, 272025, 297.5, 247.5]
0.46484375
[5.268035888671875, 19.257568359375, 592.38720703125, 480.0, 0.7569437623023987, 0.0, 270607, 298.5, 249.5]
0.46640625
[5.716949462890625, 27.395523071289062, 586.6898193359375, 480.0, 0.7558106780052185, 0.0, 263193, 295.5, 253.5]
0.46171875
[0.0, 21.995193481445312, 592.7931518554688, 480.0, 0.7279230952262878, 0.0, 271728, 296.0, 250.5]
0.4625
[11.05877685546875, 21.093826293945312, 594.4573974609375, 480.0, 0.6888989806175232, 0.0, 267597, 302.5, 250.5]
0.47265625
[7.997467041015625, 35.03300476074219, 581.3487548828125, 480.0, 0.7786721587181091, 0.0, 255430, 294.0, 257.5]
0.459375
[11.989593505859375, 32.780517578125, 588.004150390625, 480.0, 0.8314712643623352, 0.0, 258496, 299.5, 256.0]
0.46796875
[0.0, 30.141387939453125, 590.5953369140625, 480.0, 0.8109650015830994, 0.0, 265500, 295.0, 255.0]
0.4609375
[12.709381103515625, 32.274322509765625, 591.5515136

In [None]:
# Per ottenere dimensioni webcam
cap.get(cv2.CAP_PROP_FRAME_WIDTH)
cap.get(cv2.CAP_PROP_FRAME_HEIGHT)