In [1]:
# -*- coding: utf-8 -*-
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 20)


import numpy as np

from scipy import stats

from bokeh.io import show, output_file
from bokeh.plotting import figure

 
# use creds to create a client to interact with the Google Drive API
scope = ['https://spreadsheets.google.com/feeds']
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
client = gspread.authorize(creds)

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20b
from bokeh.models import LabelSet
import math

output_notebook()

In [2]:
print("The following sheets are available")
for sheet in client.openall():
    print("{} - {}".format(sheet.title, sheet.id))

The following sheets are available
SLR. Software Architectures HCI/HMI - 1msTveEyKp-IHVVQfsSpNjJquzKmvfUj5J6wloXoYME0
OEEU Opinión Cuestionario A (Responses) - 1_1ztA2Yu0s1EZqxXqd1PmNpOBWAUKaQhDHEt70aI2VA
OEEU Opinión Cuestionario B (Responses) - 1dAFnuMtqo8_cknCYjHeq7PHdQilqDv0l39e0wdx_jdk
OEEU Opinión Cuestionario C (Responses) - 1ybqILnJrf0EWCHOzIGDElqfK-a08GrkoEpNrLVtl1wo


In [3]:
spreadsheet = client.open_by_key("1msTveEyKp-IHVVQfsSpNjJquzKmvfUj5J6wloXoYME0")
sheetFinalResults = spreadsheet.get_worksheet(3)

In [4]:
#list_of_hashesA = sheetA1.get_all_records()
#print(list_of_hashes)

data = pd.DataFrame(sheetFinalResults.get_all_records())

print("Number of papers resulting the SLR: ", len(data))

Number of papers resulting the SLR:  39


In [5]:
display(data.head())

Unnamed: 0,Authors,Conference or journal name,DOI,"HCI, HMI or both?",Keywords,Source (name of publication),Title,"Type of publication (Journal, Book, Conference proceedings, etc)",Year
0,"Vega-Barbas, Mario;Pau, Ivan;Martin-Ruiz, Mari...",SENSORS,10.3390/s150407294,,Telemedicine sensor software architecture;Assi...,SENSORS,Adaptive Software Architecture Based on Confid...,Article,2015
1,"Chaczko, Zenon;Alenazy, Wael; Chan, Cheuk Yan",IBIMA Conference,,,Software architecture;Smart learning environme...,INNOVATION MANAGEMENT AND SUSTAINABLE ECONOMIC...,Middleware-based Software Architecture For Int...,Conference paper,2015
2,"Alonzo, Rommel;Cremer, Sven;Mirza, Fahad;Gowda...",Next-Generation Robotics II; and Machine Intel...,10.1117/12.2177641,,Multimodal;Multi-user;Multi-sensor framework,"NEXT-GENERATION ROBOTICS II, AND MACHINE INTEL...",Multi-Modal Sensor and HMI Integration with Ap...,Conference paper,2015
3,"Mackin, Michael A.;Gonia, Phillip T.;Lombay-Go...",IEEE AEROSPACE CONFERENCE,10.1109/AERO.2012.6187352,,,2012 IEEE AEROSPACE CONFERENCE,An Information System Prototype for Analysis o...,Conference paper,2012
4,"Biel, Bettina;Grill, Thomas;Gruhn, Volker",JOURNAL OF SYSTEMS AND SOFTWARE,10.1016/j.jss.2010.03.079,,Human–computer interaction;Software architectu...,JOURNAL OF SYSTEMS AND SOFTWARE,Exploring the benefits of the combination of a...,Article,2010


In [6]:
keywords = data["Keywords"].values
keywords_list = []
for keyword in keywords:
    [keywords_list.append(x.strip()) for x in keyword.split(';')]
    
keywords_list = [x for x in keywords_list if x != '']
display(keywords_list)

dfKeywords = pd.DataFrame(keywords_list)

['Telemedicine sensor software architecture',
 'Assistive services',
 'Digital home',
 'Activity centered design',
 'Confidence',
 'Software architecture',
 'Smart learning environment',
 'Middleware',
 'HMI',
 'Multimodal',
 'Multi-user',
 'Multi-sensor framework',
 'Human–computer interaction',
 'Software architecture analysis',
 'Usability evaluation',
 'Software engineering',
 'Ubiquitous computing',
 'Multimodality',
 'Mobility',
 'Smart spaces',
 'Middleware architecture',
 'Multi-agent systems',
 'Context-awareness',
 'Ubiquitous services',
 'Usability',
 'User interface design patterns',
 'Software architecture',
 'Usability factors',
 'Software quality metrics',
 'HCI design principles',
 'Ubiquitous Environment',
 'User Profile',
 'Ubiquitous Accessibility',
 'Context-Aware',
 'Adaptive Interface',
 'Raspberry PI',
 'Cognitive workload',
 'Usability testing workflow',
 'Speech',
 'Eye tracking',
 'Brain Computer Interfaces (BCIs)',
 'Tablet',
 'Home appliances',
 'Communicati

In [7]:
display(dfKeywords)

Unnamed: 0,0
0,Telemedicine sensor software architecture
1,Assistive services
2,Digital home
3,Activity centered design
4,Confidence
5,Software architecture
6,Smart learning environment
7,Middleware
8,HMI
9,Multimodal


In [8]:
dfKeywords.columns = ['Term']
#dfKeywords.groupby('Term').count()

In [9]:
resultsKeywords = pd.DataFrame(dfKeywords['Term'].value_counts())
display(resultsKeywords)

Unnamed: 0,Term
Software architecture,6
Multimodality,3
Context-awareness,3
Middleware,2
Mobility,2
Human-robot interaction,2
Human–computer interaction,2
Ubiquitous computing,2
Usability,2
robot,1


In [10]:
resultsKeywords.reset_index(level=0, inplace=True)
resultsKeywords.columns = ['Term', 'Count']

display(resultsKeywords)

Unnamed: 0,Term,Count
0,Software architecture,6
1,Multimodality,3
2,Context-awareness,3
3,Middleware,2
4,Mobility,2
5,Human-robot interaction,2
6,Human–computer interaction,2
7,Ubiquitous computing,2
8,Usability,2
9,robot,1


In [11]:
# output_file('vbar.html')

p = figure(x_range=resultsKeywords['Term'].tolist(), plot_width=1250, toolbar_location=None, title="Keywords Counts")
p.vbar(x=resultsKeywords['Term'].tolist(), top=resultsKeywords['Count'].tolist(), width=0.3)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [12]:
authors = data["Authors"].values
authors_list = []
for author in authors:
    [authors_list.append(x.strip()) for x in author.split(';')]
    
authors_list = [x for x in authors_list if x != '']
# display(authors_list)

dfAuthors = pd.DataFrame(authors_list)
dfAuthors.columns = ['Name']
display(dfAuthors)

Unnamed: 0,Name
0,"Vega-Barbas, Mario"
1,"Pau, Ivan"
2,"Martin-Ruiz, Maria Luisa"
3,"Seoane, Fernando"
4,"Chaczko, Zenon"
5,"Alenazy, Wael"
6,"Chan, Cheuk Yan"
7,"Alonzo, Rommel"
8,"Cremer, Sven"
9,"Mirza, Fahad"


In [28]:
resultsAuthors = pd.DataFrame(dfAuthors['Name'].value_counts())
# display(resultsAuthors)
resultsAuthors.reset_index(level=0, inplace=True)
resultsAuthors.columns = ['Name', 'Count']

resultsAuthors.sort_values(['Count', 'Name'], ascending=[False, True], inplace=True)
display(resultsAuthors)

Unnamed: 0,Name,Count
1,"Bellik, Yacine",2
0,"Bourda, Yolaine",2
2,"Jacquet, Christophe",2
45,"Abran, Alain",1
116,"Alenazy, Wael",1
109,"Almeida, AT",1
147,"Alonzo, Rommel",1
120,"Arkin, C.",1
126,"Barreto, Guilherme A.",1
137,"Bass, Len",1


In [14]:
p = figure(x_range=resultsAuthors['Name'].tolist(), plot_width=1250, toolbar_location=None, title="Authors Counts")
p.vbar(x=resultsAuthors['Name'].tolist(), top=resultsAuthors['Count'].tolist(), width=0.3)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 4
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [15]:
resultsYears = pd.DataFrame(data['Year'].value_counts())
resultsYears.reset_index(level=0, inplace=True)
resultsYears.columns = ['Year', 'Count']
resultsYears.sort_values(['Year', 'Count'], ascending=[False, False], inplace=True)
display(resultsYears)

Unnamed: 0,Year,Count
10,2017,1
0,2015,6
1,2014,6
3,2013,3
2,2012,5
4,2011,3
5,2010,3
6,2009,2
7,2008,2
8,2007,2


In [16]:
p = figure(x_range=resultsYears['Year'].astype('str').tolist(), plot_width=600, toolbar_location=None, title="Papers per year")
p.vbar(x=resultsYears['Year'].astype('str').tolist(), top=resultsYears['Count'].astype('int').tolist(), width=0.9)
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"
p.xaxis.major_label_orientation = math.pi/2

show(p)

In [17]:
resultsType = pd.DataFrame(data['Type of publication (Journal, Book, Conference proceedings, etc)'].value_counts())
resultsType.reset_index(level=0, inplace=True)
resultsType.columns = ['Type', 'Count']
resultsType.sort_values(['Type', 'Count'], ascending=[False, False], inplace=True)
display(resultsType)

Unnamed: 0,Type,Count
0,Conference paper,22
2,Chapter,2
1,Article,15


In [29]:
p = figure(x_range=resultsType['Type'].tolist(), title="Publication type")
p.vbar(x=resultsType['Type'].tolist(), top=resultsType['Count'].tolist(), width=0.9)
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

In [30]:
resultsSource = pd.DataFrame(data['Conference or journal name'].value_counts())
resultsSource.reset_index(level=0, inplace=True)
resultsSource.columns = ['Name', 'Count']
resultsSource.sort_values(['Name', 'Count'], ascending=[True, False], inplace=True)
display(resultsSource)

Unnamed: 0,Name,Count
27,ACM International Conference on Intelligent Us...,1
32,ACM International Conference on Multimodal Int...,1
34,ACM International Joint Conference on Pervasiv...,1
5,ADVANCED INTELLIGENT ENVIRONMENTS,1
12,Ambient Intelligence,1
26,"Automation, Communication and Cybernetics in S...",1
29,Autonomous Robots,1
7,Conference on Human Factors in Computing Systems,1
20,Frontiers in Neural Circuits,1
4,Human-Computer Interaction – INTERACT,1


In [20]:
p = figure(x_range=resultsSource['Name'].astype('str').tolist(), plot_height=300, title="Source (name of the publication)")
p.vbar(x=resultsSource['Name'].astype('str').tolist(), top=resultsSource['Count'].astype('int').tolist(), width=0.5)
p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"


show(p)