In [1]:
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split
import csv


In [64]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [8]:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False        
        results = holistic.process(image)
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(0,0,255), thickness=2, circle_radius=1))
        cv2.imshow('Webcam', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

In [14]:
results.pose_landmarks.landmark[0]

x: 0.5285866856575012
y: 0.6777538061141968
z: -0.9280077815055847
visibility: 0.9999287724494934

In [20]:
num_coords = len(results.pose_landmarks.landmark)
landmarks = ['class']
for val in range(1, num_coords+1):
    landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]

with open('data/coordinatess.csv', mode='w', newline='') as f:
    csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(landmarks)

In [26]:
class_name = "DBZ"
# class_name = "Breathtaking"
# class_name = "Victory"

In [None]:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = holistic.process(image)
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                 mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=1),
                                 mp_drawing.DrawingSpec(color=(0,0,255), thickness=2, circle_radius=1))
        try:
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            pose_row.insert(0, class_name)
            with open('data/coordinatess.csv', mode='a', newline='') as f:
                csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerow(pose_row)  
        except:
            pass    
        cv2.imshow('Webcam', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

In [2]:
data = pd.read_csv('data/coordinates.csv')

In [22]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(0, 500)
        self.fc2 = nn.Linear(500, 250)
        self.fc3 = nn.Linear(250, 100)
        self.fc4 = nn.Linear(100, 10)
 
    def forward(self, x):
        x = x.view(x.shape[0], 0)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = F.softmax(x, dim=1)
        return x

In [34]:
class LoadData(Dataset):
    def __init__(self, data):
        super().__init__()
        self.x = data.drop(['class'], axis=1).values
        self.y = data['class'].values
        self.n_samples = len(self.x)

    def __getitem__(self, index):
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.n_samples

In [39]:
def load_data():        
    dataset = LoadData(data)
    train_samples = int(len(dataset) * 0.8)
    train_set, test_set = random_split(dataset, [train_samples, len(dataset) - train_samples])
    trainloader = DataLoader(dataset=train_set, batch_size=8, shuffle=True)
    testloader = DataLoader(dataset=test_set, batch_size=8)
    return trainloader, testloader

In [3]:
data.shape

(3031, 133)

In [4]:
labels = data['class'].replace(['DBZ', 'Breathtaking', 'Victory'], [0,1,2])
labels.shape

(3031,)

In [5]:
data.groupby('class').size()

class
Breathtaking    736
DBZ             946
DBZ-Loki        592
Victory         757
dtype: int64

In [52]:
model = Network()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
trainloader, testloader = load_data()

epochs = 3
best_loss = float(np.inf)

for e in range(epochs):
    loss_train, loss_test = 0, 0
    model.train()
    for x_train, y_train in iter(trainloader):
        print(x_train.dtype)
        x_train, y_train = x_train.float(), y_train.float()

        optimizer.zero_grad()
        outputs = model(x_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        loss_train += loss.item()
    
    total, correct = 0, 0
    model.eval()
    with torch.no_grad():
        for x_test, y_test in iter(testloader):
            x_test, y_test = x_test.float(), y_test.float()

            output = model(x_test)
            loss = criterion(output, y_test)
            loss_test += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += y_test.size(0)
            correct += (predicted == y_test).sum().item()

    print(f'Training Loss: {loss_train}, Test Loss: {loss_test}, Epoch: {e}, Accuracy: {correct/total}')

    if loss_train < best_loss:
        best_loss = loss_train
        torch.save(model.state_dict(), 'models/poses_model.pth')



torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.float64
torch.

In [None]:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = holistic.process(image)
        image.flags.writeable = True   
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(0,0,255), thickness=2, circle_radius=1))
        try:
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            x = pd.DataFrame([pose_row])
            data_set = LoadData(x)
            newloader = DataLoader(dataset=data_set, batch_size=8)
            if __name__ == '__main__':
                model.load_state_dict(torch.load('models/poses_model.pth'))
                model.eval()
                total, correct = 0, 0
                with torch.no_grad():
                    for x, y in iter(newloader):
                        x, y = x.float(), y.float()
                        output = model(x)
                        _, predicted = torch.max(output.data, 1)
                        total += y.size(0)
                        correct += (predicted == y).sum().item()

                        cv2.rectangle(image, (0,0), (250, 60), (245, 117, 16), -1)
                        cv2.putText(image, 'CLASS', (95,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                        cv2.putText(image, y.split(' ')[0], (90,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                        cv2.putText(image, 'PROBABILITY', (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                        cv2.putText(image, str(round(predicted[np.argmax(predicted)],2)), (10,40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
                    print(f'Accuracy: {correct/total}')
        except:
            pass    
        cv2.imshow('Webcam', image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()