# Notebook details

In [None]:
def setup_notebook(fix_python_path=True, reduce_margins=True, plot_inline=True):
    if reduce_margins:
        # Reduce side margins of the notebook
        from IPython.core.display import display, HTML
        display(HTML("<style>.container { width:100% !important; }</style>"))

    if fix_python_path:
        # add egosocial to the python path
        import os, sys
        sys.path.extend([os.path.dirname(os.path.abspath('.'))])

    if plot_inline:
        # Plots inside cells
        %matplotlib inline
    
    global __file__
    __file__ = 'Notebook'

setup_notebook()

# Imports and Constants Definition

In [None]:
# !/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import json
import logging
import os

import IPython
import ipywidgets as widgets

import cv2

import egosocial
from egosocial import config
from egosocial.core.types import FaceClustering
from egosocial.utils.filesystem import check_directory 
from egosocial.utils.filesystem import create_directory
from egosocial.utils.filesystem import list_files_in_segment
from egosocial.utils.filesystem import list_segments
from egosocial.utils.logging import setup_logging
from egosocial.utils.parser import FACE_DETECTION
from egosocial.utils.parser import load_faces_from_file

DOMAINS = ['Attachent', 'Reciprocity', 'Mating', 'Heirarchical Power', 'Coalitional Group']
RELATIONS = [
    ['father-child', 'mother-child', 'grandpa-grandchild', 'grandma-grandchild'],
    ['friends', 'siblings', 'classmates'],
    ['lovers/spouses'],
    ['presenter-audience', 'teacher-student', 'trainer-trainee', 'leader-subordinate', 'customer-staff'],
    ['band members', 'dance team members', 'sport team members', 'colleages'],
]

def relation_to_domain(rel_label):
    for dom_idx, grouped_relations in enumerate(RELATIONS):
        for relation in grouped_relations:
            if rel_label == relation:
                return DOMAINS[dom_idx]
    
    for domain in DOMAINS:
        if domain in rel_label:
            return domain
    
    return rel_label

In [None]:
if not os.path.isdir(egosocial.config.TMP_DIR):
    os.mkdir(egosocial.config.TMP_DIR)

setup_logging(egosocial.config.LOGGING_CONFIG,
              log_dir=egosocial.config.LOGS_DIR)

In [None]:
#!pip install pandas
import pandas as pd

def load_labels(labels_dir, labels_file_name):
    segments = sorted(list_segments(labels_dir), key=int)
    labels_data = []
    
    for segment_id in segments:
        segm_labels_dir = os.path.join(labels_dir, segment_id)
        check_directory(segm_labels_dir, 'Labels')    
        
        labels_file = os.path.join(segm_labels_dir, labels_file_name)
        if not os.path.exists(labels_file):
            continue
        
        with open(labels_file, 'r') as json_file:
            labels_per_segment = json.load(json_file)
        
        for labels_group_id in sorted(labels_per_segment, key=lambda x : int(x['group_id'])):
            label_list, group_id = labels_group_id['labels'], int(labels_group_id['group_id'])
            if not label_list:
                label_list = ['unknown']

            for str_label in label_list:
                entry = int(segment_id), int(group_id), str_label, relation_to_domain(str_label)
                labels_data.append(entry)
    
    labels_df = pd.DataFrame(labels_data, columns=['segment_id', 'group_id', 'label', 'domain_label'])
    
    return labels_df

In [None]:
BASE_DIR = os.path.join(egosocial.config.TMP_DIR, 'egocentric', 'datasets')
#base_labels_dir = '/media/emasa/OS/Users/Emanuel/Downloads/NO_SYNC/Social Segments/labels/'

base_labels_dir = os.path.join(BASE_DIR, 'labels')
labels_file_name = 'labels.json'

train_labels_dir = os.path.join(base_labels_dir, 'train')
train_labels_df = load_labels(train_labels_dir, labels_file_name)

In [None]:
#train_labels_df.label.value_counts().plot(kind='pie', figsize=(10, 10))
train_labels_df.label.value_counts().plot(kind='barh', figsize=(10, 10))

In [None]:
test_labels_dir = os.path.join(base_labels_dir, 'test')

test_labels_df = load_labels(test_labels_dir, labels_file_name)

In [None]:
test_labels_df.label.value_counts().plot(kind='pie', figsize=(10, 10))
#test_labels_df.label.value_counts().plot(kind='barh', figsize=(10, 10))

In [None]:
extended_labels_dir = os.path.join(base_labels_dir, 'extended')
extended_labels_df = load_labels(extended_labels_dir, labels_file_name)

In [None]:
#extended_labels_df.label.value_counts().plot(kind='pie', figsize=(10, 10))
extended_labels_df.label.value_counts().plot(kind='barh', figsize=(10, 10))

In [None]:
all_labels_df = pd.concat([train_labels_df, test_labels_df, extended_labels_df])

In [None]:
import matplotlib.pyplot as plt
FONT_SIZE = 25

plt.rc('xtick', labelsize=FONT_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=FONT_SIZE)    # fontsize of the tick labels

In [None]:
all_labels_df.label[all_labels_df.label == 'colleages'] = 'colleagues'

mask_filter = ~all_labels_df.label.isin(['Attachent others', 'unknown', 'siblings', 'teacher-student'])
included_labels_df = all_labels_df[mask_filter]

relation_plot = included_labels_df.label.value_counts().plot(kind='barh', figsize=(10, 10))

for p in relation_plot.patches:
    relation_plot.annotate(str(p.get_width()), (p.get_width() * 1.01, p.get_y() * 1.03), fontsize=18)

In [None]:
domain_plot = included_labels_df.domain_label.value_counts().plot(kind='barh', figsize=(10, 10))

for p in domain_plot.patches:
    domain_plot.annotate(str(p.get_width()), (p.get_width() * 1.01, p.get_y() * 1.05), fontsize=20)
    
domain_plot

In [None]:
relations = all_labels_df.label
counts = relations.value_counts()
counts, counts / relations.count()

In [None]:
domains = all_labels_df.domain_label
counts = domains.value_counts()
counts