# Testing JS 5

In [1]:
from dotenv import load_dotenv

load_dotenv()

import sys
import os

sys.path.append(os.getenv('SRC_DIR'))

from utils.system import display_system_info

display_system_info(markdown=True)


**Last Updated**: 2025-04-13 17:15:11

**Python Version**: 3.11.5  
**OS**: Windows 10.0.26100  
**Architecture**: 64bit  
**Hostname**: ShenLaptop  
**Processor**: Intel64 Family 6 Model 186 Stepping 3, GenuineIntel  
**RAM Size**: 15.65 GB  
  
        

In [2]:
def get_ground_truth(path: str):
    with open(path, 'r') as f:
        cls_xywhns = []

        for line in f:
            values = line.strip().split(' ')


            cls = int(values[0])

            x_n, y_n, w_n, h_n = map(float, values[1:])

            cls_xywhns.append((cls, x_n, y_n, w_n, h_n))

        return cls_xywhns
    
def xywhn_to_xyxy(image, x_n, y_n, w_n, h_n):
    # cv2 images in HWC
    height, width, _ = image.shape

    x1 = int((x_n - w_n / 2) * width)
    y1 = int((y_n - h_n / 2) * height)
    x2 = int((x_n + w_n / 2) * width)
    y2 = int((y_n + h_n / 2) * height)

    return x1, y1, x2, y2

In [4]:
import pandas as pd

train_labels_dir = os.path.join(os.getenv('RAW_DATA_DIR'), os.getenv('LABEL_DIR'), os.getenv('TRAIN_DIR'))

classes = []

for filename in os.listdir(train_labels_dir):
    cls_xywhns = get_ground_truth(os.path.join(train_labels_dir, filename))

    for cls_xywhn in cls_xywhns:
        classes.append(cls_xywhn)

df_ground_truth = pd.DataFrame(classes, columns=['Class', 'x_n', 'y_n', 'w_n', 'h_n'])

df_ground_truth

Unnamed: 0,Class,x_n,y_n,w_n,h_n
0,1,0.711310,0.508929,0.090030,0.176587
1,0,0.328497,0.545635,0.115327,0.095238
2,0,0.339658,0.678902,0.217262,0.081349
3,0,0.317336,0.628307,0.289435,0.128307
4,0,0.890067,0.335317,0.164807,0.103175
...,...,...,...,...,...
10229,0,0.899182,0.633598,0.116815,0.039683
10230,0,0.554688,0.648148,0.179315,0.134921
10231,2,0.434896,0.472884,0.122768,0.173280
10232,1,0.625744,0.595899,0.075893,0.149471


## Training Data Class Distribution

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

total_counts = len(df_ground_truth['Class'])

plt.figure(figsize=(10, 5))
ax = sns.countplot(x='Class', data=df_ground_truth, palette='viridis', hue='Class')

for p in ax.patches:
    count = p.get_height()

    if count > 0:
        percentage = f'{count / total_counts * 100:.1f}%' 
        ax.text(p.get_x() + p.get_width() / 2, p.get_height() / 2, percentage, 
                ha='center', va='center', fontsize=24, color='white', fontweight='bold')
    
ax.grid(axis='y', linestyle=':', alpha=0.7)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, title='Classes', fontsize=12, title_fontsize=13, loc='center left', bbox_to_anchor=(1.02, 0.5))

plt.xlabel('Class Labels', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title(f'Class Distribution in Training Data (Total Samples: {total_counts})', fontsize=14)

plt.show()