# Human Pose Classification Using Transformers

In [None]:
import warnings
warnings.filterwarnings('ignore')


In [None]:
from datasets import load_dataset

dataset = load_dataset("Bingsu/Human_Action_Recognition", split='train')


In [None]:
dataset = dataset.shuffle().train_test_split(test_size=0.2)


In [None]:
dataset['train'][0]


In [None]:
dataset['train'][0]['image']


In [None]:
labels = dataset['train'].features['labels'].names

label2id, id2label = dict(), dict()

for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

print(label2id)
print(id2label)


# Image Preprocessing

In [None]:
from transformers import AutoImageProcessor #-> like Tokenizer

model_ckpt = "google/vit-base-patch16-224-in21k"
# model_ckpt = "microsoft/swinv2-tiny-patch4-window16-256"


In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

image_processor = AutoImageProcessor.from_pretrained(model_ckpt, use_fast=True)
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)

size = (image_processor.size['shortest_edge'] if 'shortest_edge' in image_processor.size 
        else (image_processor.size['height'], image_processor.size['width']))

_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

def transforms(batch):
    batch['pixel_values'] = [_transforms(img.convert('RGB')) for img in batch['image']]

    del batch['image']

    return batch


In [None]:
dataset = dataset.with_transform(transforms)


# Evaluate the model

In [None]:
# !pip install evaluate

import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)



#  Vision Transformer (ViT) Fine Tuning for Image Classification

In [None]:
from transformers import AutoModelForImageClassification
import torch


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = AutoModelForImageClassification.from_pretrained(
    model_ckpt,
    num_labels = len(labels),
    id2label = id2label,
    label2id = label2id
).to(device)


In [None]:
len(labels)


# Model Training

In [None]:
from transformers import TrainingArguments, Trainer


args = TrainingArguments(
    output_dir="train_dir",
    remove_unused_columns=False,
    eval_strategy='epoch',
    save_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    load_best_model_at_end=True,
    metric_for_best_model='accuracy'
)


trainer = Trainer(
    model = model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    tokenizer=image_processor,
    compute_metrics=compute_metrics
)


In [None]:
trainer.train()


In [None]:
trainer.evaluate()


In [None]:
trainer.save_model('vit-human-pose-classification')

# Classification Report and Confusion Matrix

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
logits = trainer.predict(dataset['test'])


In [None]:
logits = logits.predictions


In [None]:
print(classification_report(y_true, y_pred, target_names=labels))



In [None]:
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(5,5))
sns.heatmap(cm, annot=True, xticklabels=label2id.keys(), yticklabels=label2id.keys(), fmt='d', cbar=False, cmap='Reds')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()


# Prediction on Real Image

In [None]:
from transformers import pipeline
from transformers import AutoImageProcessor

model_ckpt = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(model_ckpt, use_fast=True)

pipe = pipeline('image-classification', model='vit-human-pose-classification', 
                image_processor=image_processor)

url = "https://images.pexels.com/photos/1755385/pexels-photo-1755385.jpeg"

output = pipe(url)
output


In [None]:
image_processor.save_pretrained('vit-human-pose-classification')


In [None]:
# Load the model and image processor from the local directory
from transformers import AutoImageProcessor, AutoModel

local_directory = 'vit-human-pose-classification'
image_processor = AutoImageProcessor.from_pretrained(local_directory, use_fast=True)
model = AutoModel.from_pretrained(local_directory, local_files_only=True)


# Push Model to AWS S3

In [None]:
# import boto3

# s3 = boto3.client('s3')

# bucket_name = 'mlopssentimentanalysis-8989'

# def create_bucket(bucket_name):
#     response = s3.list_buckets()
#     buckets = [buck['Name'] for buck in response['Buckets']]
#     if bucket_name not in buckets:
#         s3.create_bucket(Bucket=bucket_name)
#         print("Bucket is created")

#     else:
#         print("Bucket already exists in your account!!! Feel free to use it.")

# create_bucket(bucket_name)

In [None]:
# upload model folder to s3 bucket ml-models/vit-human-pose-classification
import os
import boto3

s3 = boto3.client('s3')
bucket_name = 'mlops-44448888'

def upload_directory(directory_path, s3_prefix):
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file).replace("\\", "/")
            relpath = os.path.relpath(file_path, directory_path)
            s3_key = os.path.join(s3_prefix, relpath).replace("\\", "/")
            
            s3.upload_file(file_path, bucket_name, s3_key)


upload_directory('vit-human-pose-classification', 'ml-models/vit-human-pose-classification')