<a href="https://colab.research.google.com/github/anushkad07/Deep-Learning-Project/blob/main/MakeDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json
import cv2
import pandas as pd
from tqdm import tqdm
import numpy as np

1. Density - Anushka

In [None]:
def calculate_area(bounds):
    return (bounds[2] - bounds[0]) * (bounds[3] - bounds[1])

def calculate_density_measure(data):
    total_area = 0
    for item in data['children']:
        bounds = item.get('bounds')
        if bounds:
            area = calculate_area(bounds)
            total_area += area

    frame_bounds = data.get('bounds')
    if frame_bounds:
        frame_area = calculate_area(frame_bounds)

    if total_area > 0 and frame_area > 0:
        density_measure = 1 - 2 * abs(0.5 - total_area / frame_area)
    else:
        density_measure = 0

    return density_measure


2. Colour - Yash

In [None]:
def calculate_colorfulness(image):
    # Convert the image to sRGB color space
    srgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Calculate the pixel cloud along directions (rg, yb)
    rg = srgb_image[:,:,0] - srgb_image[:,:,1]
    yb = (srgb_image[:,:,0] + srgb_image[:,:,1]) / 2 - srgb_image[:,:,2]

    # Calculate the standard deviation and mean value along directions (rg, yb)
    std_rg = np.std(rg)
    std_yb = np.std(yb)
    mean_rg = np.mean(rg)
    mean_yb = np.mean(yb)

    # Calculate ^M(3) colorfulness metric
    colorfulness = np.sqrt(std_rg*2 + std_yb2) + 0.3 * np.sqrt(mean_rg2 + mean_yb*2)

    return colorfulness

3. Proportion - Devesh

4. Symmetry - Mann

In [None]:
def determine_quadrant(element_center_x, element_center_y, center_x, center_y):
    if element_center_x < center_x and element_center_y < center_y:
        return 'UL'
    elif element_center_x >= center_x and element_center_y < center_y:
        return 'UR'
    elif element_center_x < center_x and element_center_y >= center_y:
        return 'LL'
    elif element_center_x >= center_x and element_center_y >= center_y:
        return 'LR'

def calculate_element_properties(bounds):
    width = bounds[2] - bounds[0]
    height = bounds[3] - bounds[1]
    center_x = bounds[0] + width / 2
    center_y = bounds[1] + height / 2
    return center_x, center_y, width, height

def extract_ui_elements(data, parent_bounds=None):
    elements = []
    bounds = data.get('bounds', parent_bounds)
    if 'children' not in data or not data['children']:
        return [{'class': data['class'], 'bounds': bounds}]
    for child in data['children']:
        elements.extend(extract_ui_elements(child, bounds))
    return elements

def calculate_symmetry(data):
    screen_bounds = data['bounds']
    screen_center_x = (screen_bounds[2] + screen_bounds[0]) / 2
    screen_center_y = (screen_bounds[3] + screen_bounds[1]) / 2

    ui_elements = extract_ui_elements(data)
    quadrants = {'UL': [], 'UR': [], 'LL': [], 'LR': []}
    for element in ui_elements:
        center_x, center_y, width, height = calculate_element_properties(element['bounds'])
        quadrant = determine_quadrant(center_x, center_y, screen_center_x, screen_center_y)
        quadrants[quadrant].append({
            'center_x': center_x,
            'center_y': center_y,
            'width': width,
            'height': height
        })

    normalized_values = {}
    for quadrant, elements in quadrants.items():
        x_sum = sum(element['center_x'] for element in elements)
        y_sum = sum(element['center_y'] for element in elements)
        width_sum = sum(element['width'] for element in elements)
        height_sum = sum(element['height'] for element in elements)
        count = len(elements)
        if count > 0:
            normalized_values[quadrant] = {
                'x': x_sum / count - screen_center_x,
                'y': y_sum / count - screen_center_y,
                'h': height_sum / count,
                'w': width_sum / count,
                'theta': sum(abs(element['center_y'] - screen_center_y) for element in elements) / count,
                'r': sum((element['center_x'] - screen_center_x)*2 + (element['center_y'] - screen_center_y)2 for element in elements)*0.5 / count
            }
        else:
            normalized_values[quadrant] = None

    SYM_vertical, SYM_horizontal, SYM_radial = 0, 0, 0

    if normalized_values['UL'] and normalized_values['UR']:
        SYM_vertical += (abs(normalized_values['UL']['x'] - normalized_values['UR']['x'])) / (normalized_values['UL']['w'] + normalized_values['UR']['w'])
    if normalized_values['LL'] and normalized_values['LR']:
        SYM_vertical += (abs(normalized_values['LL']['x'] - normalized_values['LR']['x'])) / (normalized_values['LL']['w'] + normalized_values['LR']['w'])

    if normalized_values['UL'] and normalized_values['LL']:
        SYM_horizontal += (abs(normalized_values['UL']['y'] - normalized_values['LL']['y'])) / (normalized_values['UL']['h'] + normalized_values['LL']['h'])
    if normalized_values['UR'] and normalized_values['LR']:
        SYM_horizontal += (abs(normalized_values['UR']['y'] - normalized_values['LR']['y'])) / (normalized_values['UR']['h'] + normalized_values['LR']['h'])

    if all(normalized_values[quadrant] is not None for quadrant in ['UL', 'UR', 'LL', 'LR']):
        SYM_radial = (normalized_values['UL']['r'] + normalized_values['UR']['r'] + normalized_values['LL']['r'] + normalized_values['LR']['r']) / 12

    div = 2 if (SYM_vertical and SYM_horizontal) else 1
    overall_symmetry = 1 - ((SYM_vertical + SYM_horizontal) / div + SYM_radial)

    return max(0, overall_symmetry)

def update_json_file(file_path, symmetry_score):
    with open(file_path, 'r') as json_file:
        data = json.load(json_file)
    data['symmetry_score'] = symmetry_score
    with open(file_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def symmetry_score(directory_path):
    symmetry_scores = {}
    for filename in os.listdir(directory_path):
        if filename.endswith('.json'):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, 'r') as json_file:
                data = json.load(json_file)
            symmetry_score = calculate_symmetry(data)
            symmetry_scores[filename] = symmetry_score
            update_json_file(file_path, symmetry_score)
    return symmetry_scores

5. Balance - Vedika

In [None]:
def screen_bounds(json_data):
    bounds = json_data['bounds']
    left, top, right, bottom = bounds
    width = right - left
    height = bottom - top
    return width, height

def parse_annotations(json_data):
    objects = []

    def parse_children(children):
        for item in children:
            if 'bounds' in item:
                left, top, right, bottom = item['bounds']
                width = right - left
                height = bottom - top
                objects.append({
                    'left': left,
                    'top': top,
                    'width': width,
                    'height': height
                })
            if 'children' in item:
                parse_children(item['children'])

    parse_children(json_data['children'])
    return objects

def compute_balance_scores(objects):
    screen_width, screen_height = screen_bounds(json_data)
    left_area = right_area = top_area = bottom_area = 0
    left_distance = right_distance = top_distance = bottom_distance = 0

    for obj in objects:
        left_area += obj['width'] if obj['left'] < screen_width else 0
        right_area += obj['width'] if obj['left'] >= screen_width else 0
        top_area += obj['height'] if obj['top'] < screen_height else 0
        bottom_area += obj['height'] if obj['top'] >= screen_height else 0

        left_distance += abs(obj['left'] + obj['width'] / 2 - screen_width) if obj['left'] < screen_width else 0
        right_distance += abs(obj['left'] + obj['width'] / 2 - screen_width) if obj['left'] >= screen_width else 0
        top_distance += abs(obj['top'] + obj['height'] / 2 - screen_height) if obj['top'] < screen_height else 0
        bottom_distance += abs(obj['top'] + obj['height'] / 2 - screen_height) if obj['top'] >= screen_height else 0

    left_weight = left_area / max(left_area, right_area)
    right_weight = right_area / max(left_area, right_area)
    top_weight = top_area / max(top_area, bottom_area)
    bottom_weight = bottom_area / max(top_area, bottom_area)

    vertical_balance = abs(left_weight - right_weight)
    horizontal_balance = abs(top_weight - bottom_weight)

    balance_measure = 1 - (vertical_balance + horizontal_balance) / 2

    return balance_measure

def balance_score(json_data):
    objects = parse_annotations(json_data)
    balance_score = compute_balance_scores(objects)
    print("Balance Score:", balance_score)
    return balance_score

**Calculate Everything**

In [None]:
output_csv = '.../measures.csv'
json_folder = '.../semantic_annotations'
image_folder = '...path/to/image/files'

page_names = []
balance_measures = []
colour_measures = []
symmetry_measures = []
proportion_measures = []
density_measures = []

progress_bar = tqdm(total=len(os.listdir(json_folder)), desc="Processing JSON files")

for filename in os.listdir(json_folder):
    if filename.endswith('.json'):
        progress_bar.update(1)  # Update progress bar
        page_number = filename.split('.')[0]
        page_names.append(page_number)

        with open(os.path.join(json_folder, filename), 'r', encoding='utf-8') as f:
            data = json.load(f)

        balance = calculate_balance(data)
        balance_measures.append(balance)

        proportion = calculate_proportion(data)
        proportion_measures.append(proportion)

        density = calculate_density_measure(data)
        density_measures.append(density)

        # Load corresponding image for colour calculation
        image_filename = page_number + '.jpg'
        image_path = os.path.join(image_folder, image_filename)
        if os.path.exists(image_path):
            colour = calculate_colorfulness(image_path)
            colour_measures.append(colour)
        else:
            colour_measures.append(None)  # Handle case where image is not found

        # Log progress every 1000 pages
        if len(page_names) % 1000 == 0:
            print(f"Processed {len(page_names)} pages")

progress_bar.close()

# Create a DataFrame to store the results
results_df = pd.DataFrame({
    'Page': page_names,
    'Balance': balance_measures,
    'Colour': colour_measures,
    'Symmetry': symmetry_measures,
    'Proportion': proportion_measures,
    'Density': density_measures
})

# Calculate final score (average of all measures)
results_df['Final Score'] = results_df[['Balance', 'Colour', 'Symmetry', 'Proportion', 'Density']].mean(axis=1)

# Save results to CSV
results_df.to_csv(output_csv, index=False)

print("Results saved to", output_csv)