# Data Preparation

## About Data
Video data from across the web and self-recorded videos is used to create the input dataset. 
Training data consists of 6 different poses as follows:
- Padamasan (Lotus Pose)
- Bhujangasana (Snake Pose)
- Shavasana (Corpse Pose) 
- Tadasana (Mountain Pose)
- Trikonasana (Triangle Pose)
- Vrikshasana (Tree Pose)

## Processing
The video data is preprocessed to create time series data consisting of angles at the body joints frame by frame while performing the yoga. The video input is passed through Mediapipe's Blazepose to get the coordinates(33 points on the body) of which the coordinates of wrist,  elbow, shoulder,  hip,  knee,  ankle  for both right and left are used to calculate 8 angle(including left and right both side) for the following groups: 
- Wrist, Elbow, Shoulder 
- Elbow, Shoulder, Hip 
- Shoulder, Hip, Knee 
- Hip, Knee, Ankle 

The angles are calculated for 30 equidistant data points through out the user pose.


In [54]:
import cv2
import mediapipe as mp
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import euclidean
# from fastdtw import fastdtw
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

lst1 = []
singleList = []

def calculateAngle(x):
    '''
    Given the coordinated(x,y) calculate the angel formed by them
    
    Parameters:
        x(List[(x,y)]) : List of 3 pairs of x and y coordinates 
    
    Returns:
        angle(float): Angle made by the 3 coordinates(c1, c2, c3) at c2
    '''
    a = np.array(x[0])
    b = np.array(x[1])
    c = np.array(x[2])
    
    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
    angle = np.abs(radians*180.0/np.pi)
    
    if angle>180.0:
        angle = 360-angle
    return angle

def calculate_angles(path):
        '''
    Given the path of the video calculate angle time series during the pose
    
    Parameters:
        path : Path to the video file
    
    Returns:
        df(Pandas.dataframe):Dataframe consisting of 8 angles for each frame
    '''
    cap = cv2.VideoCapture(path)
    lst = []
    
    angledict = {'leftWES':[], 'leftESH':[],'leftSHK':[], 'leftHKA':[], 'rightWES':[], 'rightESH':[], 'rightSHK':[],'rightHKA':[]}
    with mp_pose.Pose(
        smooth_landmarks=False,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as pose:
        while cap.isOpened() :
            success, image = cap.read()
            if not success:
                break
            # Flip the image horizontally for a later selfie-view display, and convert
            # the BGR image to RGB.    
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#             frame_height, frame_width = image.shape[:2]
            
            # To improve performance, optionally mark the image as not writeable to
            # pass by reference.
            # image.flags.writeable = False
            results = pose.process(image)
            
            # Declaring aliases
            landmarks = results.pose_landmarks.landmark
            setOfJoints = mp_pose.PoseLandmark
            
            # Storing the coordinates as a pair of x and y values
            leftWrist = [landmarks[setOfJoints.LEFT_WRIST].x, landmarks[setOfJoints.LEFT_WRIST].y]
            leftElbow = [landmarks[setOfJoints.LEFT_ELBOW].x, landmarks[setOfJoints.LEFT_ELBOW].y]
            leftShoulder = [landmarks[setOfJoints.LEFT_SHOULDER].x, landmarks[setOfJoints.LEFT_SHOULDER].y]
            leftHip = [landmarks[setOfJoints.LEFT_HIP].x, landmarks[setOfJoints.LEFT_HIP].y]
            leftKnee = [landmarks[setOfJoints.LEFT_KNEE].x, landmarks[setOfJoints.LEFT_KNEE].y]
            leftAnkle = [landmarks[setOfJoints.LEFT_ANKLE].x, landmarks[setOfJoints.LEFT_ANKLE].y]
            rightWrist = [landmarks[setOfJoints.RIGHT_WRIST].x, landmarks[setOfJoints.RIGHT_WRIST].y]
            rightElbow = [landmarks[setOfJoints.RIGHT_ELBOW].x, landmarks[setOfJoints.RIGHT_ELBOW].y]
            rightShoulder = [landmarks[setOfJoints.RIGHT_SHOULDER].x, landmarks[setOfJoints.RIGHT_SHOULDER].y]
            rightHip = [landmarks[setOfJoints.RIGHT_HIP].x, landmarks[setOfJoints.RIGHT_HIP].y]
            rightKnee = [landmarks[setOfJoints.RIGHT_KNEE].x, landmarks[setOfJoints.RIGHT_KNEE].y]
            rightAnkle = [landmarks[setOfJoints.RIGHT_ANKLE].x, landmarks[setOfJoints.RIGHT_ANKLE].y]
            
            #  Creating a dictionary to calculate different angles
            angleDict = {'leftWES': (leftWrist, leftElbow, leftShoulder), 
                         'leftESH': (leftElbow, leftShoulder, leftHip), 
                         'leftSHK': (leftShoulder, leftHip, leftKnee), 
                         'leftHKA': (leftHip, leftKnee, leftAnkle), 
                         'rightWES': (rightWrist, rightElbow, rightShoulder), 
                         'rightESH': (rightElbow, rightShoulder, rightHip), 
                         'rightSHK': (rightShoulder, rightHip, rightKnee), 
                         'rightHKA': (rightHip, rightKnee, rightAnkle)}
           
            
            # Processing the angle dict to calculate different angles
            for angle in angleDict:
                x=calculateAngle(angleDict[angle])
                angledict[angle].append(x)

    cap.release()
    cv2.destroyAllWindows()
    df = pd.DataFrame(angledict)
    # print(df.head())
    return df

In [54]:
# Calculating the angle time series 
dfins = calculate_angles('yoga_min.mp4')

# Storing the dataframe to a csv
dfins.to_csv('yoga3.csv', index=False)

In [20]:
df = pd.read_csv('yoga3.csv')

In [21]:
df

Unnamed: 0,leftWES,leftESH,leftSHK,leftHKA,rightWES,rightESH,rightSHK,rightHKA
0,172.768991,25.432565,120.074887,31.466746,169.035355,25.560993,115.471929,33.662575
1,172.168818,26.081544,121.832135,30.830461,169.151667,27.481450,113.628079,31.221266
2,170.429261,28.899776,122.535209,30.829244,165.946378,28.784643,114.215600,31.166409
3,168.264156,31.548129,123.391039,31.765528,162.150498,30.724038,114.957871,32.568336
4,166.630244,33.817120,124.282068,34.303610,160.926532,33.989186,114.271365,33.210670
...,...,...,...,...,...,...,...,...
170,167.030613,11.224975,120.366534,24.749314,160.164557,8.136002,116.546473,34.272147
171,167.094827,11.324279,120.997462,25.147070,160.659154,8.203846,116.029822,34.299070
172,166.984537,11.075818,121.176234,24.901472,160.861050,8.490985,115.830933,34.237287
173,166.786962,11.424646,119.918815,23.801803,161.058076,8.380051,116.207768,34.009205


In [68]:
# import os
# ls =  ['Bhuj',   'Padam',  'Shav',  'Tada',  'Trik',  'Vriksh']
# for i in ls:
#     for j in poses[i]:
#         path = 'Yoga_Vid_Collected/'+i+"/"+j
#         if os.path.exists(path):
#             print('exists:',path)
#         else:
#             print("Not found",j)

In [69]:
# converting all the videos in to angle timeseries and storing them in csv files organized folderwise
import os
ls =   ['Bhuj',   'Padam',  'Shav',  'Tada',  'Trik',  'Vriksh']
for i in ls:
    k=0
    for j in poses[i]:
        path = 'Yoga_Vid_Collected/'+i+"/"+j
        if os.path.exists(path):
            print('exists:',path)
            dfins = calculate_angles(path)
            dst_path = 'Yoga_data_processed/'+i+"/"+str(k)+".csv"
            k = k+1
            dfins.to_csv(dst_path, index=False)
        else:
            print("Not found",j)

exists: Yoga_Vid_Collected/Trik/Pranshul_Trik.mp4
exists: Yoga_Vid_Collected/Trik/Sarthak_Trik.mp4
exists: Yoga_Vid_Collected/Trik/deepa_trikon.mp4
exists: Yoga_Vid_Collected/Trik/Kaustuk_Trik.mp4
exists: Yoga_Vid_Collected/Trik/Rakesh_Trik.mp4
exists: Yoga_Vid_Collected/Trik/Shiva_Trik.mp4
exists: Yoga_Vid_Collected/Vriksh/Abhay_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Dristi_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Piyush_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Sarthak_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Ameya_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Harshav_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Pranshul_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Shiva_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Bhumi_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Kaustuk_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/Rakesh_Vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/veena_vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh/deepa_vriksh.mp4
exists: Yoga_Vid_Collected/Vriksh

# manipulating data

In [6]:
# load data
import pandas as pd
data  = pd.read_csv('Yoga_data_processed/Bhuj/0.csv')

data    

Unnamed: 0,leftWES,leftESH,leftSHK,leftHKA,rightWES,rightESH,rightSHK,rightHKA
0,144.387282,2.554389,151.056204,55.368664,138.072808,21.307567,148.315448,49.962746
1,139.830499,4.947921,156.982230,52.912876,137.930976,19.659540,156.022275,53.351936
2,135.073300,7.062356,157.555271,53.267891,136.210445,20.046551,157.085526,54.448806
3,139.365008,5.805856,157.881883,51.263553,136.839882,19.889886,157.304753,52.440584
4,140.791029,5.163977,159.141742,53.796227,137.657115,20.763851,159.098236,56.708318
...,...,...,...,...,...,...,...,...
927,126.222222,11.905343,158.623367,153.409432,125.099555,10.332126,165.578688,152.652570
928,126.496315,12.182822,158.586649,153.307024,125.054811,10.355728,166.430834,150.802258
929,125.567883,12.284131,158.958605,153.177119,125.219592,11.228920,166.490638,152.426236
930,126.204589,14.134332,159.672121,152.046548,124.695864,11.878251,167.156380,151.521428


In [22]:
type(data.iloc[0])

pandas.core.series.Series

In [41]:
data.shape

(932, 8)

In [25]:
pd.DataFrame()


pandas.core.frame.DataFrame

In [87]:
9//2

4

## Creating training data
The time series for the user pose is processes to get multiple equidistance time series each containing 30 data points 

In [None]:
# testing the processing code
num  = 932//30
list_df = [pd.DataFrame() for i in range(num)]

print(num)
k=0
for i in range(num):
    for j in range(30):
        list_df[j] = pd.concat([list_df[j],data.iloc[k+j].to_frame().T],ignore_index=True)
    k=k+30

In [40]:
!ls Yoga_data_processed/

Bhuj  Padam  Shav  Tada  Trik  Vriksh


In [99]:
"""
    For each of the yoga pose time series for the user pose is processes to get multiple equidistance time series
    each containing 30 data points.
"""

# all the poses 
list = {'Bhuj': 14 ,'Padam':13,  'Shav':12 , 'Tada':14 , 'Trik':10  ,'Vriksh':14}

import os
for pose in list:
    for ls in range(list[pose]):
        data = pd.read_csv('Yoga_data_processed/'+pose+"/"+str(ls)+".csv")
    
        num  = data.shape[0]//30
    
        list_df = [pd.DataFrame() for i in range(num-1)]

        k=0
        for i in range(num-1):
            for j in range(30):
                list_df[i] = pd.concat([list_df[i],data.iloc[k+j].to_frame().T],ignore_index=True)
            k=k+30
        
        for n,file in enumerate(list_df):
            path = 'Yoga_data_large/'+pose+"/"+str(ls)+"."+str(n)+".csv"
            if not os.path.exists(path):
                print('exists:',path)
                file.to_csv(path, index=False)
            else:
                print("Not found",path)

exists: Yoga_data_large/Bhuj/0.0.csv
exists: Yoga_data_large/Bhuj/0.1.csv
exists: Yoga_data_large/Bhuj/0.2.csv
exists: Yoga_data_large/Bhuj/0.3.csv
exists: Yoga_data_large/Bhuj/0.4.csv
exists: Yoga_data_large/Bhuj/0.5.csv
exists: Yoga_data_large/Bhuj/0.6.csv
exists: Yoga_data_large/Bhuj/0.7.csv
exists: Yoga_data_large/Bhuj/0.8.csv
exists: Yoga_data_large/Bhuj/0.9.csv
exists: Yoga_data_large/Bhuj/0.10.csv
exists: Yoga_data_large/Bhuj/0.11.csv
exists: Yoga_data_large/Bhuj/0.12.csv
exists: Yoga_data_large/Bhuj/0.13.csv
exists: Yoga_data_large/Bhuj/0.14.csv
exists: Yoga_data_large/Bhuj/0.15.csv
exists: Yoga_data_large/Bhuj/0.16.csv
exists: Yoga_data_large/Bhuj/0.17.csv
exists: Yoga_data_large/Bhuj/0.18.csv
exists: Yoga_data_large/Bhuj/0.19.csv
exists: Yoga_data_large/Bhuj/0.20.csv
exists: Yoga_data_large/Bhuj/0.21.csv
exists: Yoga_data_large/Bhuj/0.22.csv
exists: Yoga_data_large/Bhuj/0.23.csv
exists: Yoga_data_large/Bhuj/0.24.csv
exists: Yoga_data_large/Bhuj/0.25.csv
exists: Yoga_data_larg

In [110]:
dir_list = os.listdir('Yoga_data_large/Bhuj')

In [111]:
dir_list

['5.39.csv',
 '4.13.csv',
 '4.43.csv',
 '6.4.csv',
 '4.34.csv',
 '13.7.csv',
 '10.2.csv',
 '7.17.csv',
 '8.9.csv',
 '10.5.csv',
 '9.40.csv',
 '7.1.csv',
 '7.24.csv',
 '11.51.csv',
 '7.19.csv',
 '7.31.csv',
 '10.47.csv',
 '10.55.csv',
 '9.19.csv',
 '7.13.csv',
 '4.3.csv',
 '10.46.csv',
 '1.9.csv',
 '5.5.csv',
 '10.48.csv',
 '0.2.csv',
 '8.37.csv',
 '1.22.csv',
 '5.12.csv',
 '.ipynb_checkpoints',
 '8.25.csv',
 '12.16.csv',
 '1.10.csv',
 '10.11.csv',
 '11.16.csv',
 '2.24.csv',
 '7.25.csv',
 '11.14.csv',
 '7.22.csv',
 '3.9.csv',
 '11.19.csv',
 '11.44.csv',
 '6.8.csv',
 '12.11.csv',
 '8.15.csv',
 '11.54.csv',
 '9.14.csv',
 '11.55.csv',
 '2.37.csv',
 '3.17.csv',
 '0.10.csv',
 '12.26.csv',
 '4.28.csv',
 '11.48.csv',
 '7.26.csv',
 '0.1.csv',
 '0.15.csv',
 '11.42.csv',
 '10.14.csv',
 '11.6.csv',
 '12.33.csv',
 '2.12.csv',
 '6.20.csv',
 '11.43.csv',
 '13.2.csv',
 '9.23.csv',
 '4.38.csv',
 '0.29.csv',
 '6.19.csv',
 '3.15.csv',
 '1.20.csv',
 '11.24.csv',
 '11.49.csv',
 '10.27.csv',
 '5.21.csv',
 '

In [104]:
data =  pd.read_csv('Yoga_data_large/Bhuj/0.0.csv')

## Adding Target coloumn to each of the time series
The target label(y) is added to each of the time series csv's

In [105]:
lst = [1]*data.shape[0]
print(lst)
df = pd.DataFrame(lst,columns =['yoga'])
print(df)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    yoga
0      1
1      1
2      1
3      1
4      1
5      1
6      1
7      1
8      1
9      1
10     1
11     1
12     1
13     1
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     1
23     1
24     1
25     1
26     1
27     1
28     1
29     1


In [106]:
pd.concat([data,df],axis = 1)


Unnamed: 0,leftWES,leftESH,leftSHK,leftHKA,rightWES,rightESH,rightSHK,rightHKA,yoga
0,144.387282,2.554389,151.056204,55.368664,138.072808,21.307567,148.315448,49.962746,1
1,139.830499,4.947921,156.98223,52.912876,137.930976,19.65954,156.022275,53.351936,1
2,135.0733,7.062356,157.555271,53.267891,136.210445,20.046551,157.085526,54.448806,1
3,139.365008,5.805856,157.881883,51.263553,136.839882,19.889886,157.304753,52.440584,1
4,140.791029,5.163977,159.141742,53.796227,137.657115,20.763851,159.098236,56.708318,1
5,142.411489,4.572217,157.624198,51.101114,137.550629,18.972859,159.409471,53.98248,1
6,140.481162,4.916581,157.723117,50.241518,136.098258,19.471313,159.013825,54.739413,1
7,143.246762,2.423797,156.873435,53.10585,141.538789,16.621847,157.455851,57.066727,1
8,144.674631,1.528235,156.881435,51.098861,144.959755,13.871608,157.653073,55.511931,1
9,147.692028,0.147333,154.998147,51.46317,145.551094,13.397915,156.0983,52.382888,1


In [122]:
#{'Bhuj': 1 ,'Padam':2,  'Shav':3 , 'Tada':4 , 'Trik':5  ,'Vriksh':6}
dir_list =  os.listdir('Yoga_data_large/Vriksh')
dir_list.remove('.ipynb_checkpoints')
# print(dir_list)
for i in dir_list:
    print(i)
    data =  pd.read_csv('Yoga_data_large/Vriksh/'+i)
    lst = [6]*data.shape[0]
    df = pd.DataFrame(lst,columns =['yoga'])
    df = pd.concat([data,df],axis = 1)
    df.to_csv('Yoga_data_large_labeled/Vriksh/'+i,index=False)

5.39.csv
4.13.csv
7.59.csv
4.43.csv
6.4.csv
4.34.csv
13.7.csv
10.2.csv
7.17.csv
8.9.csv
10.5.csv
3.44.csv
3.50.csv
9.40.csv
7.1.csv
7.24.csv
3.27.csv
7.48.csv
7.19.csv
0.36.csv
7.31.csv
10.47.csv
7.43.csv
13.19.csv
0.53.csv
9.19.csv
3.39.csv
7.13.csv
4.3.csv
10.46.csv
1.9.csv
5.5.csv
10.48.csv
0.2.csv
8.37.csv
1.22.csv
5.12.csv
8.25.csv
12.16.csv
1.10.csv
10.11.csv
11.16.csv
2.24.csv
13.30.csv
7.25.csv
11.14.csv
7.22.csv
3.9.csv
11.19.csv
6.8.csv
12.11.csv
8.15.csv
0.40.csv
9.14.csv
2.37.csv
3.17.csv
0.10.csv
12.26.csv
4.28.csv
7.26.csv
0.1.csv
0.15.csv
10.14.csv
4.46.csv
11.6.csv
12.33.csv
2.12.csv
6.20.csv
13.2.csv
7.54.csv
3.38.csv
9.23.csv
4.38.csv
0.29.csv
6.19.csv
3.15.csv
1.20.csv
7.58.csv
11.24.csv
3.40.csv
10.27.csv
5.21.csv
3.29.csv
1.44.csv
1.36.csv
4.0.csv
4.35.csv
0.39.csv
5.27.csv
12.31.csv
1.8.csv
4.37.csv
11.25.csv
3.2.csv
1.7.csv
12.7.csv
13.20.csv
9.5.csv
10.37.csv
6.16.csv
1.47.csv
9.3.csv
3.28.csv
5.42.csv
10.42.csv
3.33.csv
13.38.csv
11.17.csv
13.8.csv
5.14.csv
10.