# Collegamento a Drive e Installazione librerie

In [None]:
# 1. COLLEGA GOOGLE DRIVE
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 2. INSTALLA LE DIPENDENZE COMPATIBILI
!pip uninstall -y mediapipe protobuf

# 2. Installa protobuf version
!pip install protobuf==3.20.3

# 3. Installa mediapipe
!pip install mediapipe==0.10.8

[0mFound existing installation: protobuf 5.29.5
Uninstalling protobuf-5.29.5:
  Successfully uninstalled protobuf-5.29.5
Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydf 0.12.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 3.20.3 which is incompatible.
grpcio-status 1.71.2 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 3.20.3 which is incompatible.
tensorflow-metadata 1.17.2 requires protobuf>=4.25.2; python_version >= "3.11", but you have protobuf 3.20.3 which is incompatible.[0m[31m
[0mSuccessfully installed 

Collecting mediapipe==0.10.8
  Downloading mediapipe-0.10.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting sounddevice>=0.4.4 (from mediapipe==0.10.8)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Downloading mediapipe-0.10.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.2-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.8 sounddevice-0.5.2


# Elaborazione Depth Map e posizionamento Landmarks

In [None]:
# 3. SCARICA IL MODELLO DI RILEVAMENTO MANI
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

# 4. ESEGUI IL CODICE DI ESTRAZIONE LANDMARK
import os
import cv2
import csv
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.ndimage import binary_fill_holes

# ---------------- DEPTH MAP FUNZIONI ----------------

def load_raw_depth_map(filename, width=640, height=480):
    with open(filename, 'rb') as f:
        raw_data = np.fromfile(f, dtype=np.uint16, count=width * height)
        if raw_data.size != width * height:
            raise ValueError("Dimensioni del file non corrispondono a quelle attese.")
        depth_map = raw_data.reshape((height, width))
    return depth_map

def fill_pixel_depth_image(img, i, j):
    h, w = img.shape
    neighbors = []
    for di in [-1, 0, 1]:
        for dj in [-1, 0, 1]:
            if di == 0 and dj == 0:
                continue
            ni, nj = i + di, j + dj
            if 0 <= ni < h and 0 <= nj < w:
                val = img[ni, nj]
                if 0 < val < 65535:
                    neighbors.append(val)
    if neighbors:
        img[i, j] = np.mean(neighbors)

def process_depth_map(depth_map):
    BW = depth_map > 0
    BW_filled = binary_fill_holes(BW)
    mask = BW_filled.astype(np.uint8) - BW.astype(np.uint8)
    h, w = depth_map.shape
    for i in range(1, h - 1):
        for j in range(1, w - 1):
            if mask[i, j]:
                fill_pixel_depth_image(depth_map, i, j)
    return depth_map

# ---------------- PARAMETRI ----------------

dataset_path = '/content/drive/MyDrive/GRUPPO_14/dataset'
image_width, image_height = 640, 480
selected_ids = list(range(21))  # 21 punti della mano

# ---------------- INIZIALIZZA MEDIAPIPE ----------------

model_path = 'hand_landmarker.task'
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
detector = vision.HandLandmarker.create_from_options(options)

# ---------------- ELABORA IMMAGINI + DEPTH CORRISPONDENTE ----------------
z_depth_list = []

for subdir, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            z_depth_list = []
            image_path = os.path.join(subdir, file)
            image_name_wo_ext = os.path.splitext(file)[0]

            # Estrai prefisso:
            if "_Color" in image_name_wo_ext:
                prefix = image_name_wo_ext.split("_Color")[0]
            else:
                print(f"Immagine {file} non segue il formato *_Color.*")
                continue

            # Costruisci nome del file depth corrispondente
            depth_filename = os.path.join(subdir, prefix + '_Depth.raw')
            if not os.path.exists(depth_filename):
                print(f"Depth map non trovata per {file} → {depth_filename}")
                continue

            # Carica e processa depth map
            depth_map = load_raw_depth_map(depth_filename)
            filled_depth_map = process_depth_map(depth_map.copy())

            # Rilevamento landmark
            mp_image = mp.Image.create_from_file(image_path)
            result = detector.detect(mp_image)

            if not result.hand_landmarks:
                print(f"Nessuna mano trovata in {image_path}")
                continue

            landmarks = result.hand_landmarks[0]
            output_csv = os.path.join(subdir, prefix + '_landmarks_with_depth.csv')
            with open(output_csv, mode='w', newline='') as f:
                writer = csv.writer(f)
                writer.writerow(['id', 'x_pixel', 'y_pixel', 'z_normalized', 'z_depth'])

                for i in selected_ids:
                    lm = landmarks[i]
                    x_pixel = round(lm.x * (image_width - 1))
                    y_pixel = round(lm.y * (image_height - 1))

                    if 0 <= y_pixel < filled_depth_map.shape[0] and 0 <= x_pixel < filled_depth_map.shape[1]:
                        z_depth = filled_depth_map[y_pixel, x_pixel]
                    else:
                        z_depth = -1  # fuori immagine

                    z_depth_list.append(z_depth)
                    writer.writerow([i, x_pixel, y_pixel, lm.z, z_depth])

                # Verifica se tutti i landmark hanno z = -1 → plottiamo la depth map
                if all(z == -1 for z in z_depth_list):
                  print(f"ATTENZIONE: tutti i landmark per il frame {file} sono fuori dalla depth map")



            print(f"Salvato: {output_csv}")


# Creazione Dataset completo con coordinate e calcolo distanze

In [None]:
# 5. ACCORPA TUTTI I CSV IN UN UNICO FILE

# Lista per accogliere tutti i dati
all_data = []

# Scorri nuovamente tutte le cartelle per cercare i file *_landmarks.csv
for subdir, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.endswith('_landmarks_with_depth.csv'):
            file_path = os.path.join(subdir, file)

            # Leggi il CSV
            df = pd.read_csv(file_path)

            # Ottieni il nome della cartella padre come classe
            gesture_class = os.path.basename(os.path.dirname(file_path))

            # Aggiungi colonna con la classe del gesto
            df['class'] = gesture_class

            all_data.append(df)

# Unisci tutti i DataFrame in uno solo
final_df = pd.concat(all_data, ignore_index=True)

# Salva in un unico CSV
final_df.to_csv('/content/drive/MyDrive/GRUPPO_14/dataset/landmarks_all_with_depth.csv', index=False)
print("✅ File unico creato: landmarks_all_with_depth.csv")


Calcolo distanze tra landmarks

In [None]:
import numpy as np
import pandas as pd
import csv
import os

# ----------- PARAMETRI CAMERA (come da MATLAB) ------------------
s = {
    "principal_point": [309.568, 245.154],
    "focal_length": [474.346, 474.346],
    "distortion_coeffs": [0.139663, 0.0914142, 0.00468509, 0.00220023, 0.0654529],
    "depth_scale": 0.000125
}

# ----------- FUNZIONE: Deproiezione con correzione ottica ----------------
def rs2_deproject_pixel_to_point(s, pixel, depth):
    cx, cy = s["principal_point"]
    fx, fy = s["focal_length"]
    k1, k2, p1, p2, k3 = s["distortion_coeffs"]

    x = (pixel[0] - cx) / fx
    y = (pixel[1] - cy) / fy

    r2 = x * x + y * y
    f = 1 + k1 * r2 + k2 * r2 ** 2 + k3 * r2 ** 3

    ux = x * f + 2 * p1 * x * y + p2 * (r2 + 2 * x * x)
    uy = y * f + 2 * p2 * x * y + p1 * (r2 + 2 * y * y)

    point_x = depth * ux
    point_y = depth * uy
    point_z = depth

    return np.array([point_x, point_y, point_z])

# ----------- FUNZIONE: Calcola distanza euclidea -------------------------
def dist_3D(s, pixel_start, pixel_end):
    """
    Calcola la distanza reale 3D tra due pixel 2D con profondità z,
    tenendo conto delle distorsioni ottiche e della deproiezione.
    """
    depth_units = 0.000125
    z1 = pixel_start[2] * depth_units
    z2 = pixel_end[2] * depth_units

    p1 = rs2_deproject_pixel_to_point(s, pixel_start[:2], z1)
    p2 = rs2_deproject_pixel_to_point(s, pixel_end[:2], z2)

    distance = np.sqrt(np.sum((p2 - p1) ** 2))
    return distance

# ----------- COPPIE DI LANDMARK (come da immagine MediaPipe) ------------
landmark_pairs = [
    (0, 1), (1, 2), (2, 3), (3, 4), (0, 5),
    (5, 9), (9, 13), (13, 17),(5, 6), (6, 7),
    (7, 8),(9, 10), (10, 11), (11, 12),
    (13, 14), (14, 15), (15, 16),
    (17, 18), (18, 19), (19, 20)
]

import numpy as np
import pandas as pd
import csv

# ----------- PARAMETRI CAMERA (come da MATLAB) ------------------
s = {
    "principal_point": [309.568, 245.154],
    "focal_length": [474.346, 474.346],
    "distortion_coeffs": [0.139663, 0.0914142, 0.00468509, 0.00220023, 0.0654529],
    "depth_scale": 0.000125
}

# ----------- FUNZIONE DEPROIEZIONE --------------------------------
def rs2_deproject_pixel_to_point(s, pixel, depth):
    cx, cy = s["principal_point"]
    fx, fy = s["focal_length"]
    k1, k2, p1, p2, k3 = s["distortion_coeffs"]

    x = (pixel[0] - cx) / fx
    y = (pixel[1] - cy) / fy

    r2 = x * x + y * y
    f = 1 + k1 * r2 + k2 * r2**2 + k3 * r2**3

    ux = x * f + 2 * p1 * x * y + p2 * (r2 + 2 * x * x)
    uy = y * f + 2 * p2 * x * y + p1 * (r2 + 2 * y * y)

    point_x = depth * ux
    point_y = depth * uy
    point_z = depth

    return np.array([point_x, point_y, point_z])

# ----------- FUNZIONE DISTANZA 3D --------------------------
def dist_3D(s, pixel_start, pixel_end):
    z1 = pixel_start[2] * s["depth_scale"]
    z2 = pixel_end[2] * s["depth_scale"]
    p1 = rs2_deproject_pixel_to_point(s, pixel_start[:2], z1)
    p2 = rs2_deproject_pixel_to_point(s, pixel_end[:2], z2)
    return np.linalg.norm(p2 - p1)

# ----------- NOMI LANDMARK MEDIAPIPE -------------------------------
landmark_names = [
    "WRIST", "THUMB_CMC", "THUMB_MCP", "THUMB_IP", "THUMB_TIP",
    "INDEX_MCP", "INDEX_PIP", "INDEX_DIP", "INDEX_TIP",
    "MIDDLE_MCP", "MIDDLE_PIP", "MIDDLE_DIP", "MIDDLE_TIP",
    "RING_MCP", "RING_PIP", "RING_DIP", "RING_TIP",
    "PINKY_MCP", "PINKY_PIP", "PINKY_DIP", "PINKY_TIP"
]

landmark_pairs = [
    (0, 1), (1, 2), (2, 3), (3, 4), (0, 5),
    (5, 9), (9, 13), (13, 17),
    (5, 6), (6, 7), (7, 8),
    (9, 10), (10, 11), (11, 12),
    (13, 14), (14, 15), (15, 16),
    (17, 18), (18, 19), (19, 20)
]

# ----------- CARICAMENTO CSV DEI LANDMARK -------------------------
csv_path = '/content/drive/MyDrive/GRUPPO_14/dataset/landmarks_all_with_depth.csv'
all_data = pd.read_csv(csv_path)
num_landmarks = 21
num_frames = len(all_data) // num_landmarks

# ----------- CREA CSV UNICO PER TUTTI I FRAME ---------------------
out_path = '/content/drive/MyDrive/GRUPPO_14/dataset/distanze_tutti_i_frame.csv'
with open(out_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["frame_id", "pair", "distance_cm"])  # intestazione

    for frame_id in range(num_frames):
        frame_data = all_data.iloc[frame_id * num_landmarks : (frame_id + 1) * num_landmarks]
        landmarks = frame_data[["x_pixel", "y_pixel", "z_depth"]].values  # shape: (21, 3)

        for i, j in landmark_pairs:
            name_i = landmark_names[i]
            name_j = landmark_names[j]
            d = dist_3D(s, landmarks[i], landmarks[j]) * 100  # in cm
            writer.writerow([frame_id, f"{name_i}_{name_j}", d])

print(f"Tutte le distanze sono state salvate in '{out_path}'")

COSTRUZIONE DATASET FINALE (FEATURES + CLASSE)

In [None]:
import pandas as pd

#  1. Carica il file delle distanze 3D
df_dist = pd.read_csv('/content/drive/MyDrive/GRUPPO_14/dataset/distanze_tutti_i_frame.csv')

#  2. Carica il file dei landmark con la classe
df_landmarks = pd.read_csv('/content/drive/MyDrive/GRUPPO_14/dataset/landmarks_all_with_depth.csv')

# Calcola il numero di frame = ogni 21 righe è un frame
num_landmarks = 21
num_frames = len(df_landmarks) // num_landmarks

# Estrai la classe per ogni frame (usando ogni 21 righe)
class_per_frame = df_landmarks['class'][::num_landmarks].reset_index(drop=True)

# Crea un DataFrame con mapping frame_id → class
frame_class_df = pd.DataFrame({
    'frame_id': range(num_frames),
    'class': class_per_frame
})

# 3. Riorganizza le distanze: ogni riga = 1 frame, ogni colonna = 1 coppia landmark
pivot_df = df_dist.pivot(index='frame_id', columns='pair', values='distance_cm')
pivot_df = pivot_df.reindex(sorted(pivot_df.columns), axis=1)  # ordina colonne alfabeticamente

# 4. Aggiungi la classe
pivot_df = pivot_df.merge(frame_class_df, on='frame_id')

#  5. Salva il file aggregato
pivot_df.to_csv('/content/drive/MyDrive/GRUPPO_14/dataset/distanze_aggregated_con_classe.csv', index=False)
print("File salvato: distanze_aggregated_con_classe.csv")


Suddivisione Dataset

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import random

# Prepara le feature (X) e i target (y)
dataset = pd.read_csv('./distanze_aggregated_con_classe.csv')
dataset = dataset.drop(['frame_id'], axis=1)

# print the amount of 0 values in the features
print(f"Amount of 0 values in the features: {((dataset == 0).sum()).sum()}")

X = dataset.drop(['class'], axis=1)  # Tutte le colonne tranne la classe

y = dataset['class']                 # Colonna target

# Normalizzazione dei dati
scaler = StandardScaler() # Standardizzazione z-score
X_scaled = scaler.fit_transform(X)

# Divisione dataset in construction e test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

import os

# Percorso base
base_path = '/content/drive/MyDrive/GRUPPO_14/dataset'

# Crea le cartelle se non esistono
for folder in ['training', 'validation', 'test']:
    os.makedirs(os.path.join(base_path, folder), exist_ok=True)

# Ricostruisci i DataFrame con le classi
train_df = pd.DataFrame(X_train, columns=[f'feat{i}' for i in range(X_train.shape[1])])
train_df['class'] = y_train.reset_index(drop=True)

test_df = pd.DataFrame(X_test, columns=[f'feat{i}' for i in range(X_test.shape[1])])
test_df['class'] = y_test.reset_index(drop=True)

# Salva i file CSV
train_df.to_csv(os.path.join(base_path, 'training', 'train.csv'), index=False)
test_df.to_csv(os.path.join(base_path, 'test', 'test.csv'), index=False)

print("Dataset salvato in: training/train.csv, test/test.csv")


In [None]:
print("Shape X_train:", X_train.shape)
print("Shape X_test:", X_test.shape)

In [None]:
# count the amount of samples for each class

print(y.value_counts())

Visualizzazione RGB+Landmarks

In [None]:
img_path = "/content/drive/MyDrive/GRUPPO_14/dataset/1/ALESSANDRA_1D_Color.png"
csv_path = "/content/drive/MyDrive/GRUPPO_14/dataset/1/ALESSANDRA_1D_Color_landmarks.csv"

def salva_landmark_con_connessioni(img_path, csv_path, output_path):
    import cv2
    import pandas as pd
    import matplotlib.pyplot as plt

    landmark_pairs = [
        (0, 1), (1, 2), (2, 3), (3, 4), (0, 5),
        (5, 9), (9, 13), (13, 17), (5, 6), (6, 7),
        (7, 8), (9, 10), (10, 11), (11, 12),
        (13, 14), (14, 15), (15, 16),
        (17, 18), (18, 19), (19, 20), (0, 17)
    ]

    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    df = pd.read_csv(csv_path)

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.imshow(img_rgb)
    ax.axis('off')

    for _, row in df.iterrows():
        x = row['x_pixel']
        y = row['y_pixel']
        ax.plot(x, y, 'ro', markersize=4)

    for i, j in landmark_pairs:
        point_i = df[df['id'] == i]
        point_j = df[df['id'] == j]
        if not point_i.empty and not point_j.empty:
            xi, yi = point_i.iloc[0]['x_pixel'], point_i.iloc[0]['y_pixel']
            xj, yj = point_j.iloc[0]['x_pixel'], point_j.iloc[0]['y_pixel']
            ax.plot([xi, xj], [yi, yj], color='yellow', linewidth=1.5)

    fig.savefig(output_path, bbox_inches='tight')
    plt.close(fig)
    print(f"Immagine salvata in: {output_path}")

img1_path = "/content/drive/MyDrive/GRUPPO_14/dataset/1/ALESSANDRA_1D_Color.png"
csv1_path = "/content/drive/MyDrive/GRUPPO_14/dataset/1/ALESSANDRA_1D_Color_landmarks.csv"

img2_path = "/content/drive/MyDrive/GRUPPO_14/dataset/2/ALESSANDRA_1D_Color.png"
csv2_path = "/content/drive/MyDrive/GRUPPO_14/dataset/2/ALESSANDRA_1D_Color_landmarks.csv"

img3_path = "/content/drive/MyDrive/GRUPPO_14/dataset/3/ALESSANDRA_1D_Color.png"
csv3_path = "/content/drive/MyDrive/GRUPPO_14/dataset/3/ALESSANDRA_1D_Color_landmarks.csv"

img4_path = "/content/drive/MyDrive/GRUPPO_14/dataset/4/ALESSANDRA_1D_Color.png"
csv4_path = "/content/drive/MyDrive/GRUPPO_14/dataset/4/ALESSANDRA_1D_Color_landmarks.csv"

salva_landmark_con_connessioni(img1_path, csv1_path, "/content/drive/MyDrive/GRUPPO_14/gesto1.png")
salva_landmark_con_connessioni(img2_path, csv2_path, "/content/drive/MyDrive/GRUPPO_14/gesto2.png")
salva_landmark_con_connessioni(img3_path, csv3_path, "/content/drive/MyDrive/GRUPPO_14/gesto3.png")
salva_landmark_con_connessioni(img4_path, csv4_path, "/content/drive/MyDrive/GRUPPO_14/gesto4.png")

# Valutazione riduzione dimensionale

In [None]:
# reduce dimensionality by applying PCA
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print("Shape X_train_pca:", X_train_pca.shape)
print("Shape X_test_pca:", X_test_pca.shape)