# Imports & Function Definitions

In [None]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import kaleido as kal
import seaborn as sns

from matplotlib.lines import Line2D 
from io import StringIO

In [None]:
# method to get the unique values for a list
def unique(lst, _print = False):
    unique_list = []
    
    for elem in lst:
        elem = str(elem)
        for x in elem.split("; "):
            x_s = x.strip()
            if x_s not in unique_list:
                unique_list.append(x_s)
            
    if _print:
        print(unique_list)
    
    unique_list.sort()
    
    return unique_list

In [None]:
# method to map the terms in a list to an index through a dictionary
def map_idx(lst):
    d = dict() 
    i = 0
    
    for x in lst:
        d[x] = i
        i = i+1
        
    return d

In [None]:
# creates a correlation by counting the co-occurrences of the values in the two given columns
def relate_columns(col_name, row_name):
    columns = unique(data.loc[: , col_name])
    rows = unique(data.loc[: , row_name])

    column_idx = map_idx(columns)
    row_idx = map_idx(rows)

    corr = [[0 for x in range(len(columns))] for y in range(len(rows))] 

    for idx, row in data.iterrows():
        _col = str(row[col_name])
        _row = str(row[row_name])
        
        for c in _col.split("; "):
            for r in _row.split("; "):
                c_s = c.strip()
                r_s = r.strip()
                
                val = corr[row_idx[r_s]][column_idx[c_s]]
                val = val + 1
                corr[row_idx[r_s]][column_idx[c_s]] = val

    df_data = np.array(corr)
    df = pd.DataFrame(data=df_data, index=rows, columns=columns)
    
    return df

In [None]:
# counts the occurrences of the entities for the given column
def count_entries(name, thresh=-1, asc=True):
    values = unique(data.loc[: , name])
    d = dict()
    
    for idx, row in data.iterrows():
        _row = str(row[name])
        
        for e in _row.split("; "):
            e_s = e.strip()
            d[e_s]= 1 if e_s not in d else d[e_s]+1
    
    if thresh >= 0:
        new_d = dict()
        for k in d.keys():
            if d[k] >= thresh:
                new_d[k] = d[k]
        d = new_d
    
    keys = list(d.keys())
    values = np.array(list(d.values()))
    if asc:
        sorted_value_index = np.argsort(values)
    else:
        sorted_value_index = np.argsort(-values)  
    sorted_dict = {keys[i]: values[i] for i in sorted_value_index}

    return sorted_dict

In [None]:
# counts the occurrences of the entities per row for the given column
def count_row_entries(name, thresh = -1, asc=True):
    values = unique(data.loc[: , name])
    d = dict()
    
    for idx, row in data.iterrows():
        _cell = str(row[name])
        _row = str(row["Key"])
        val = len(_cell.split("; "))
        d[_row] = val
    
    if thresh >= 0:
        new_d = dict()
        for k in d.keys():
            if d[k] >= thresh:
                new_d[k] = d[k]
        d = new_d
    
    keys = list(d.keys())
    values = list(d.values())
    if asc:
        sorted_value_index = np.argsort(values)
    else:
        sorted_value_index = np.argsort(-values)    
    sorted_dict = {keys[i]: values[i] for i in sorted_value_index}

    return sorted_dict

# Data Analysis

In [None]:
# read csv file into variable
data = pd.read_csv('./Data/Final Publications.csv')
data = data.fillna("None")

In [None]:
# Relate two columns by counting the amount of publications wit overlapping values
# The first named column sets the x-axis, the second the y-axis
relate_columns("Use Case", "Domain")

In [None]:
# Get the highest related value and its corresponding value from the second column for the values in the first column
df = relate_columns("Use Case", "CSK Source")
print(df.max())
print(df.idxmax())

In [None]:
# Count entries in the given column
count_entries('Evaluation DS')

In [None]:
# look for overlaps in data from Zech2019 and our publications
data_zech = pd.read_csv('./Data/Publications Zech2019.csv')
overlap = []

for idx, row in data_zech.iterrows():
    entry_zech = str(row["Title"])
    for idx2, row2 in data.iterrows():
        entry_us = str(row2["Title"])
        if entry_us.casefold() == entry_zech.casefold():
            overlap.append(entry_zech)
            
print(overlap)

# Visualization

In [None]:
# Duplicates Heat Map
data_dup = pd.read_csv('./Data/Duplicates.csv')
data_dup = data_dup.set_index('Unnamed: 0')
fig = px.imshow(data_dup,
                text_auto=True,
                aspect="auto",
                labels=dict(x="Source", y="Source", color="# Duplicates"),
                color_continuous_scale=px.colors.sequential.RdBu,
                height=700, width=900)
fig.update_layout({
    "plot_bgcolor": "rgba(1, 0, 0, 0)",
    #"paper_bgcolor": "rgba(1, 0, 0, 0)"
})
#fig.show()
pio.write_image(fig, './Images/Duplicates.pdf', format='pdf', engine='kaleido')

In [None]:
# CSK Source Usage bar chart (vertical)

lst = [0, 0, 0, 0, 0, 0]
for k, v in count_row_entries('CSK Source').items():
    pos = v
    if v == 1:
        row = data.loc[data['Key'] == k]
        if row['CSK Source'].iloc[0] == 'Not mentioned':
            pos = 0
        else:
            pos = 1
    lst[pos] += 1

df = pd.DataFrame(lst)
ax = df.plot(kind='bar', stacked=False, ylabel='# Publications', xlabel='Used Sources', rot=0, legend=False)
ax.bar_label(ax.containers[0])
fig = ax.get_figure()
fig.savefig('./Images/CSK Source Usage.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# CSK Source Type bar chart (vertical)

s = StringIO("""Type;Sources;Publications
Human;3;18
Unstructured;10;17
Semi-Structured;6;11
Structured;10;26""")

df = pd.read_csv(s, index_col=0, delimiter=';', skipinitialspace=True)
ax = df.plot(kind='bar', stacked=False, ylabel='Amount', xlabel='Type', rot=0)
for container in ax.containers:
    ax.bar_label(container)
fig = ax.get_figure()
fig.savefig('./Images/CSK Source Type.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Application Domains

domains = count_entries('Domain', asc=False)
df = pd.DataFrame(domains.values(), index=domains.keys())
ax = df.plot(kind='bar', stacked=False, ylabel='# Publications', xlabel='Domain', rot=0, legend=False)
ax.bar_label(ax.containers[0])
fig = ax.get_figure()
fig.savefig('./Images/Domains.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Use Cases

s = StringIO("""use case;Focus;Proof-of-Concept;Neither
Object Localization;3;5;0
Object Delivery;2;5;0
Environment Exploration;4;1;0
Tool Substitution;5;0;0
Object Recognition;3;1;0
Intention Inference;2;1;0
Pick & Place;0;3;0
Cooking;0;2;0
Navigation;1;1;0
Table Setting;1;1;0
Tidy Up;2;0;0
Warehousing;0;2;0
Hole Digging;0;1;0
Location Detection;0;1;0
Reminiscence Therapy;1;0;0
None;0;0;4""")

df = pd.read_csv(s, index_col=0, delimiter=';', skipinitialspace=True)
dfp = df.pivot_table(values=['Focus', 'Proof-of-Concept', 'Neither'], index='use case', aggfunc=np.sum, sort=False)
fig = dfp.plot(kind='barh', stacked=True, ylabel='', xlabel='# Publications', rot=0).get_figure()

ax = fig.gca()
for bar in ax.patches:
    width = bar.get_width()
    height = bar.get_height()
    x = bar.get_x()
    y = bar.get_y()
    if width > 0:
        ax.text(x + width/2, y + height/2, f'{width:.0f}', ha='center', va='center', color='black', fontsize=10)

fig.savefig('./Images/Use Cases.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Year

years = count_entries('Year')
years.update({"2017": 0}) 
df = pd.DataFrame(years.values(), index=years.keys())
df.sort_index(inplace=True)

ax = df.plot(kind='bar', stacked=False, ylabel='# Publications', xlabel='Year', rot=0, legend=False)
ax.bar_label(ax.containers[0])
fig = ax.get_figure()
fig.savefig('./Images/Year.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Venue

venue = count_entries('Type', asc=False)
df = pd.DataFrame(venue.values(), index=venue.keys())
ax = df.plot(kind='bar', stacked=False, ylabel='# Publications', xlabel='Venue Type', rot=0, legend=False)
ax.bar_label(ax.containers[0])
fig = ax.get_figure()
fig.savefig('./Images/Venue Type.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Questions (Old)

s = StringIO("""use case;amount;cat
What is the expected location for an object?;22;a) Objects
What affordances does an object have?;17;a) Objects
Which tools can be used for a certain task?;11;a) Objects
Which objects are similar to the given object?;9;a) Objects
How can I interact with an object / container / etc.?;8;a) Objects
Can I accomplish the given task or do I need help?;7;b) Interaction
What are the physical properties of an object (e.g. size, shape, color)?;7;a) Objects
Where to place objects (on a table)?;7;a) Objects
How can an object be transported / grasped?;6;a) Objects
How can I react to an incomplete command by a human?;4;b) Interaction
What are the spatial relations of this object?;4;a) Objects
What materials make up the object?;4;a) Objects
Which objects in the environment need to be avoided?;4;a) Objects
What are the intentions a human could have with a certain object?;3;b) Interaction
What is the outcome of my current action?;3;c) Causality
Where are specific humans located?;3;b) Interaction
Does my new knowledge contradict my knowledge base?;2;c) Causality
What activity / event do I perceive?;2;c) Causality
What is the location where certain objects are currently located?;2;a) Objects
What parts does the object consist of?;2;a) Objects
Which brand produced the object?;2;a) Objects
What aspects of my environment are changing?;1;c) Causality
What is the (current) functional state of the object?;1;a) Objects
What is the (shortest) distance to my current goal?;1;c) Causality
What is the sentiment of a concept (positive / negative)?;1;c) Causality""")

colors = {'a) Objects':'tab:blue', 'b) Interaction':'tab:orange', 'c) Causality':'tab:green'}
df = pd.read_csv(s, index_col=0, delimiter=';', skipinitialspace=True)

fig = plt.figure() # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
width = 0.5

df.amount.plot(kind='barh', color=df['cat'].map(colors), ax=ax, width=width, position=0.5, rot=0)
ax.bar_label(ax.containers[0], padding=3)

#add legend
handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=v, label=k, markersize=8) for k, v in colors.items()]
ax.legend(title='Category', handles=handles, bbox_to_anchor=(0.75, 0.75), loc='center')

ax.set_xlabel('# Publications providing an answer')
ax.set_ylabel('Questions')
plt.savefig('./Images/QuestionsOld.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Questions (New)

s = StringIO("""use case;amount;cat
What is the expected location for an object?;22;Object-Centred Reasoning
What affordances does an object have?;17;Object-Centred Reasoning
Which tools can be used for a certain task?;11;Object-Centred Reasoning
Which objects are similar to the given object?;9;Object-Centred Reasoning
How can I interact with an object / container / etc.?;8;Object-Centred Reasoning,Task-Specific Knowledge
Can I accomplish the given task or do I need help?;7;Intuitive Psychology,Task-Specific Knowledge
What are the physical properties of an object (e.g. size, shape, color)?;7;Object-Centred Reasoning
Where to place objects (on a table)?;7;Task-Specific Knowledge
How can an object be transported / grasped?;6;Object-Centred Reasoning,Intuitive Physics
How can I react to an incomplete command by a human?;4;Intuitive Psychology
What are the spatial relations of this object?;4;Object-Centred Reasoning,Intuitive Physics
What materials make up the object?;4;Object-Centred Reasoning
Which objects in the environment need to be avoided?;4;Intuitive Physics
What are the intentions a human could have with a certain object?;3;Intuitive Psychology
What is the outcome of my current action?;3;Intuitive Physics
Where are specific humans located?;3;Intuitive Psychology
Does my new knowledge contradict my knowledge base?;2;Intuitive Psychology
What activity / event do I perceive?;2;Intuitive Physics,Task-Specific Knowledge
What is the location where certain objects are currently located?;2;Object-Centred Reasoning
What parts does the object consist of?;2;Object-Centred Reasoning
Which brand produced the object?;2;Object-Centred Reasoning
What aspects of my environment are changing?;1;Intuitive Physics
What is the (current) functional state of the object?;1;Object-Centred Reasoning,Intuitive Physics
What is the (shortest) distance to my current goal?;1;Intuitive Physics
What is the sentiment of a concept (positive / negative)?;1;Intuitive Psychology""")

colors = {'Object-Centred Reasoning':'tab:blue', 'Task-Specific Knowledge':'tab:orange', 'Intuitive Psychology':'tab:green', 
          'Intuitive Physics':'tab:purple','Temporal Reasoning': 'tab:brown'}

# Split multi-category questions
fig, ax = plt.subplots(figsize=(7,8))
width = 0.4

for i, (idx, row) in enumerate(df.iterrows()):
    cats = [c.strip() for c in row['cat'].split(',')]
    amount_per_cat = row['amount'] / len(cats)
    
    left = 0
    for cat in cats:
        ax.barh(i, amount_per_cat, left=left, color=colors[cat], height=width)
        left += amount_per_cat
    ax.text(left + 0.2, i, str(row['amount']), va='center', ha='left')

# Labels
ax.set_yticks(range(len(df)))
ax.set_yticklabels(df.index)
ax.set_xlabel('# Publications providing an answer')
ax.set_ylabel('Questions')

# Add legend
handles = [Line2D([0], [0], marker='o', color='w', markerfacecolor=v, label=k, markersize=8) for k, v in colors.items()]
ax.legend(title='Category', handles=handles, bbox_to_anchor=(0.75, 0.75), loc='center')

#plt.tight_layout()
plt.savefig('./Images/QuestionsNew.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Question Domains

s = StringIO("""ECSK Domain;# Questions;# Papers
Object-Centred Reasoning;12;43
Task-Specific Knowledge;4;17
Intuitive Physics;8;18
Intuitive Psychology;6;17""")

df = pd.read_csv(s, index_col=0, delimiter=';', skipinitialspace=True)
ax = df.plot(kind='barh', stacked=False, ylabel='ECSK Domain', xlabel='Amount', rot=0)
for container in ax.containers:
    ax.bar_label(container)
fig = ax.get_figure()
fig.savefig('./Images/ECSK Domains.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Evaluation Setting

s = StringIO("""Setting;Approaches;Simulated;Real-World
None;2;0;0
Questionnaire;1;0;0
Motivating Example;5;0;0
Model Evaluation;7;0;0
Case Study;0;6;7
Experiment;0;20;12""")

df = pd.read_csv(s, index_col=0, delimiter=';', skipinitialspace=True)
dfp = df.pivot_table(values=['Approaches', 'Simulated', 'Real-World'], index='Setting', aggfunc=np.sum, sort=False)
fig = dfp.plot(kind='barh', stacked=True, ylabel='', xlabel='# Publications', rot=0).get_figure()

ax = fig.gca()
for bar in ax.patches:
    width = bar.get_width()
    height = bar.get_height()
    x = bar.get_x()
    y = bar.get_y()
    if width > 0:
        ax.text(x + width/2, y + height/2, f'{width:.0f}', ha='center', va='center', color='black', fontsize=10)

fig.savefig('./Images/Evaluation Setting.pdf', dpi=300, format='pdf', bbox_inches='tight')

In [None]:
# Main CSK Sources

sources = count_entries('CSK Source', asc=False)
small_sources = [k for k, v in sources.items() if v == 1]
for s in small_sources:
    del sources[s]
sources['Others'] = len(small_sources)

df = pd.DataFrame(sources.values(), index=sources.keys())
ax = df.plot(kind='bar', stacked=False, ylabel='# Publications', xlabel='', rot=90, legend=False)
ax.bar_label(ax.containers[0])
fig = ax.get_figure()
ax.set_xticklabels(df.index, rotation=45, ha='right')
fig.savefig('./Images/CSK Main Sources.pdf', dpi=300, format='pdf', bbox_inches='tight')