# Notebook to clean and adjust data

In [1]:
import pandas as pd
import os
from PIL import Image
import data_functions as func

## Constants

In [2]:
EXPERT_BOOM_DATA_PATH = '/exchange/dspro2/M-AI-ZE/data/annotations_expert/annotations_boom.csv'
EXPERT_DRONE_DATA_PATH = '/exchange/dspro2/M-AI-ZE/data/annotations_expert/annotations_drone.csv'
EXPORT_HANDHELD_DATA_PATH = '/exchange/dspro2/M-AI-ZE/data/annotations_expert/annotations_handheld.csv'

BOOM_IMAGE_PATH = '/exchange/dspro2/M-AI-ZE/data/images/images_boom'
DRONE_IMAGE_PATH = '/exchange/dspro2/M-AI-ZE/data/images/images_drone'
HANDHELD_IMAGE_PATH = '/exchange/dspro2/M-AI-ZE/data/images/images_handheld'

BOOM_IMAGE_RESIZE_PATH = '/exchange/dspro2/M-AI-ZE/data/images_resized/images_boom'
DRONE_IMAGE_RESIZE_PATH = '/exchange/dspro2/M-AI-ZE/data/images_resized/images_drone'
HANDHELD_IMAGE_RESIZE_PATH = '/exchange/dspro2/M-AI-ZE/data/images_resized/images_handheld'

EXPORT_PATH = '/exchange/dspro2/M-AI-ZE/data/adjusted/1.1/expert_data_1.1.csv'

## Import data

In [3]:
expert_boom_data = pd.read_csv(EXPERT_BOOM_DATA_PATH, delimiter=",", quotechar='"')
expert_drone_data = pd.read_csv(EXPERT_DRONE_DATA_PATH, delimiter=",", quotechar='"')
expert_handheld_data = pd.read_csv(EXPORT_HANDHELD_DATA_PATH, delimiter=",", quotechar='"')

## Unify Prefix of the Image Files

In [9]:
func.unify_img_suffix(BOOM_IMAGE_PATH)
func.unify_img_suffix(DRONE_IMAGE_PATH)
func.unify_img_suffix(HANDHELD_IMAGE_PATH)

## Clean CSV Data

In [23]:
clean_boom = func.remove_duplicates(expert_boom_data)
clean_drone = func.remove_duplicates(expert_drone_data)
clean_handheld = func.remove_duplicates(expert_handheld_data)

In [24]:
clean_boom = func.order_coordinates(clean_boom)
clean_drone = func.order_coordinates(clean_drone)
clean_handheld = func.order_coordinates(clean_handheld)

In [25]:
clean_boom = func.remove_no_area_boxes(clean_boom)
clean_drone = func.remove_no_area_boxes(clean_drone)
clean_handheld = func.remove_no_area_boxes(clean_handheld)

In [26]:
clean_drone = func.unify_img_suffix_df(clean_drone)
clean_handheld = func.unify_img_suffix_df(clean_handheld)
# Add .jpg suffix for boom images
clean_boom['image'] = clean_boom['image'] + '.jpg'

In [27]:
# Only the boom data contains coordinates with negative values
clean_boom = func.clip_negative_coord_values(clean_boom)

In [28]:
# Remove entries for not existing images
clean_boom = clean_boom[(clean_boom['image'] != 'DSC06208_3.jpg') & (clean_boom['image'] != 'DSC06209_0.jpg')]

## Combine the Data of the Three Images Types and Export

In [18]:
clean_boom['type'] = 'boom'
clean_drone['type'] = 'drone'
clean_handheld['type'] = 'handheld'
expert_data_combined = pd.concat([clean_boom, clean_drone, clean_handheld], ignore_index=True)

In [19]:
expert_data_combined.to_csv(EXPORT_PATH, index=False)