# YogaGuru: Data Collection

## Import required libraries


In [None]:
import cv2,math
import mediapipe as mp
import pandas as pd
import numpy as np

## Capture the Landmarks from a Image/Video

In [None]:
mpDraw = mp.solutions.drawing_utils
mpPose = mp.solutions.pose
pose = mpPose.Pose()
#Enter your video location to collect the landmarks
cap = cv2.VideoCapture('C:/Users/mi/YogaGuru/Tree/10.jpg')
landmark_frames_data=[]
while True:
    success, img = cap.read()
    if success==True:
        h,w,c=img.shape 
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = pose.process(imgRGB)
        landmark_frames_data.append([results.pose_landmarks])
    else:
        break
print("All landmarks Captured in landmark_frames_data ")
cap.release() 

## landmark_frames_data have relative x,y,z cordinate and the visibility of that body landmark predicted bu Mediapipe 

In [None]:
landmark_frames_data

## list of attribute for Making Dataset

In [None]:
column_list=['right_elbow_angle',
'right_shoulder_angle',
'left_shoulder_angle',
'left_elbow_angle',
'right_hip_angle',
'left_hip_angle',
'right_knee_angle',
'left_knee_angle',
'right_ankle_angle',
'left_ankle_angle',
'right_shoulder_wrt_nose_angle',
'left_shoulder_wrt_nose_angle',
'PoseName',
'PoseAccurracy']

In [None]:
# Creating Data frame
data=pd.DataFrame(columns=column_list)

##  getAngle: a  function to find the angle between 3 points

In [None]:

def getAngle(a, b, c):
    ang = math.degrees(math.atan2(c[1]-b[1], c[0]-b[0]) - math.atan2(a[1]-b[1], a[0]-b[0]))
    return ang + 360 if ang < 0 else ang
 
# Example:
print(getAngle([0.23182656, 0.86334693, 0.00919855, 0.98291081], [0.2782656, 0.668, 0.00919855, 0.98291081], (0, 5)))

## make_dict: a function that will return a dict with column attribute and the respective value

In [None]:
def make_dict(data,pose_name):
    test=[]
    for res in data[0].landmark:
        test.append(np.array([res.x,res.y,res.z,res.visibility]))
    test1=[]
    for res in test:
        test1.append([res[0]*w,res[1]*h,res[2],res[3]])

    right_elbow_angle=getAngle(test1[16],test1[14],test1[12])
    right_shoulder_angle=getAngle(test1[14],test1[12],test1[24])
    left_shoulder_angle=getAngle(test1[13],test1[11],test1[23])
    left_elbow_angle=getAngle(test1[15],test1[13],test1[11])

    right_hip_angle=getAngle(test1[12],test1[24],test1[26])
    left_hip_angle=getAngle(test1[11],test1[23],test1[25])
    right_knee_angle=getAngle(test1[24],test1[26],test1[28])
    left_knee_angle=getAngle(test1[23],test1[25],test1[27])

    right_ankle_angle=getAngle(test1[26],test1[28],test1[32])
    left_ankle_angle=getAngle(test1[25],test1[27],test1[31])
    right_shoulder_wrt_nose_angle=getAngle(test1[0],test1[12],test1[11])
    left_shoulder_wrt_nose_angle=getAngle(test1[0],test1[11],test1[12])

    angles={'right_elbow_angle':right_elbow_angle,
            'right_shoulder_angle':right_shoulder_angle,
            'left_shoulder_angle':left_shoulder_angle,
            'left_elbow_angle':left_elbow_angle,
            'right_hip_angle':right_hip_angle,
            'left_hip_angle':left_hip_angle,
            'right_knee_angle':right_knee_angle,
            'left_knee_angle':left_knee_angle,
            'right_ankle_angle':right_ankle_angle,
            'left_ankle_angle':left_ankle_angle,
            'right_shoulder_wrt_nose_angle':right_shoulder_wrt_nose_angle,
            'left_shoulder_wrt_nose_angle':left_shoulder_wrt_nose_angle,
            'PoseName':pose_name,
            'PoseAccurracy':0}
    return angles

## get_angles_dict: a function that will make a list of dict containg the attributes and their values
This is required to add data to a Dataframe

In [None]:
m=[]
def get_angles_dict(landmark_frames_data,pose_name):
    for i in landmark_frames_data:
        if i[0]!=None:
            m.append(make_dict(i,pose_name))

In [None]:
get_angles_dict(landmark_frames_data,'Tree')

## The result of get_angles_dict is stored in a list name 'm'


In [None]:
m

## Creating Dataframe to store the body joint angles

In [None]:
data=data.append(m,ignore_index=True)

In [None]:
data

## lets Integrate the different function together

In [None]:
## Integrating all the function and creating one point of integration
def Generate_Data(list_of_resources,pose_name):
    mpDraw = mp.solutions.drawing_utils
    mpPose = mp.solutions.pose
    pose = mpPose.Pose()
    landmark_frames_data=[]
    for resorce_location in list_of_resources:
        cap = cv2.VideoCapture(resorce_location)
        while True:
            success, img = cap.read()
            if success==True:
                h,w,c=img.shape 
                imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                results = pose.process(imgRGB)
                landmark_frames_data.append([results.pose_landmarks])
            else:
                break
    print("All landmarks Captured in landmark_frames_data ")
    cap.release()
    
    column_list=['right_elbow_angle',
                'right_shoulder_angle',
                'left_shoulder_angle',
                'left_elbow_angle',
                'right_hip_angle',
                'left_hip_angle',
                'right_knee_angle',
                'left_knee_angle',
                'right_ankle_angle',
                'left_ankle_angle',
                'right_shoulder_wrt_nose_angle',
                'left_shoulder_wrt_nose_angle',
                'PoseName',
                'PoseAccurracy']
    m=[]
     
    for i in landmark_frames_data:
        if i[0]!=None:
            m.append(make_dict(i,pose_name))
            
    data=pd.DataFrame(columns=column_list)        
    data=data.append(m,ignore_index=True)
    return data


## Call Generate_Data function to get the Dataframe by passing he location of Video/photos of the Yoga poses
Generate_Data function returns a DataFrame and list_of_resources contains the local drive location of Images/Video required to collect the data points

In [None]:
list_of_resources=['C:/Users/mi/YogaGuru/Tree/1.jpg','C:/Users/mi/YogaGuru/Tree/10.jpg','C:/Users/mi/YogaGuru/Tree/1.jpg','C:/Users/mi/YogaGuru/Tree/10.jpg','C:/Users/mi/YogaGuru/Tree/1.jpg','C:/Users/mi/YogaGuru/Tree/10.jpg']
Generate_Data(list_of_resources,'demo')

#### We need some photos/video which shows the correct poses and will consider that as our 100% accuarte data
For more Accuracy will consider atleast 10 images from different tutor and will take mean of all the column attribute and assume that as a standard values and this will minimize the errors for the our perfect pose Data points

In [None]:
list_of_perfect_res=['C:/Users/mi/YogaGuru/Tree/1.jpg', 'C:/Users/mi/YogaGuru/Tree/2.jpg', 'C:/Users/mi/YogaGuru/Tree/3.jpg', 'C:/Users/mi/YogaGuru/Tree/4.jpg', 'C:/Users/mi/YogaGuru/Tree/5.jpg', 'C:/Users/mi/YogaGuru/Tree/6.jpg', 'C:/Users/mi/YogaGuru/Tree/7.jpg', 'C:/Users/mi/YogaGuru/Tree/8.jpg', 'C:/Users/mi/YogaGuru/Tree/9.png', 'C:/Users/mi/YogaGuru/Tree/10.jpg']

In [None]:
dummydata=Generate_Data(list_of_perfect_res,'Tree')

#### To create the datapoints with 100% accuracy

In [None]:
temp_list=list(dummydata.mean())
temp_list.insert(-1,'Tree')
temp_list[-1]=100

In [None]:
dummydata.iloc[0]=temp_list

#### This is the data points of 100% Accurate yoga pose

In [None]:
dummydata.iloc[0]

#### Now will compare all other available data points with these points and calculate the accuarcy using our get_error function

In [None]:
def get_error(ideal,practice):
        temp=[(abs(ideal[i]-practice[i])/ideal[i]) for i in range(12)]
       
        return 100-(sum(temp)*100/12) 

In [None]:
for i in range(1,len(dummydata)):
    dummydata.iloc[i,13]=get_error(list(dummydata.loc[0,:])[:-2],list(dummydata.loc[i,:])[:-2])
    

In [None]:
dummydata=dummydata.round(0)

In [None]:
dummydata

## Now, Start colleting data from Video
Note: The Video/Image should contain a tutor/teacher/person if the video/image have no human body visible it will stop taking the landmarks and it will move to next video/image

In [None]:
tree=Generate_Data(['C:/Users/mi/YogaGuru/Tree/video1.mp4','C:/Users/mi/YogaGuru/Tree/video2.mp4'],'Tree')

In [None]:
tree=tree.round(0)

In [None]:
tree

## Enter the error in the DataFrame

In [None]:
for i in range(1,len(tree)):
    tree.iloc[i,13]=get_error(list(dummydata.loc[0,:])[:-2],list(tree.loc[i,:])[:-2])

In [None]:
tree

## Repeat the process for other poses and create a big data set to make a good prediction model
Try to make the data set less bias by adding Imgages/Video such that the accuracy for each poses lies between 0 to 100.
(This dataset have some bias and therfore the results are not that great but you can improve it)

In [None]:
data['PoseLabel'] = data['PoseName'].astype('category').cat.codes
X = data.drop(columns=['PoseName', 'PoseLabel'], errors='ignore')
y = data['PoseLabel']

# Step 2: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Step 4: Evaluate the Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


In [None]:
with open("yoga_pose_model.pkl", "wb") as f:
    pickle.dump(model, f)
print("Model saved as 'yoga_pose_model.pkl'")