# PQE Research Papers Progress Report 

In [1]:
import pandas as pd
import numpy
import csv
from os import listdir
import re

## Loading Glove trained model 

In [2]:
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectors
# load the Stanford GloVe model
filename = 'D:/RA Maher/Glove Trained Vectors/glove.840B.300d.txt.word2vec'
model = KeyedVectors.load_word2vec_format(filename, binary=False)



## Human-centered Computing (HCC) Tags/keywords From ACM

In [3]:
import pickle
from anytree import Node, RenderTree

with open('hcc_tree.pickle', 'rb') as handle:
    b = pickle.load(handle)

print("Source: https://dl.acm.org/ccs/ccs.cfm?id=10003120&lid=0.10003120\n")
for pre, fill, node in RenderTree(b):
    print("%s%s" % (pre, node.name))

Source: https://dl.acm.org/ccs/ccs.cfm?id=10003120&lid=0.10003120

Human-centered computing
├── Human computer interaction (HCI)
│   ├── HCI design and evaluation methods
│   │   ├── User models
│   │   ├── User studies
│   │   ├── Usability testing
│   │   ├── Heuristic evaluations
│   │   ├── Walkthrough evaluations
│   │   ├── Laboratory experiments
│   │   └── Field studies
│   ├── Interaction paradigms
│   │   ├── Hypertext / hypermedia
│   │   ├── Mixed / augmented reality
│   │   ├── Command line interfaces
│   │   ├── Graphical user interfaces
│   │   ├── Virtual reality
│   │   ├── Web-based interaction
│   │   ├── Natural language interfaces
│   │   └── Collaborative interaction
│   ├── Interaction devices
│   │   ├── Graphics input devices
│   │   ├── Displays and imagers
│   │   ├── Sound-based input / output
│   │   ├── Keyboards
│   │   ├── Pointing devices
│   │   ├── Touch screens
│   │   └── Haptic devices
│   ├── HCI theory, concepts and models
│   ├── Interaction tec

In [4]:
with open("D:/RA Maher/Human-centered computing.txt", 'r') as f:
    raw = f.readlines()
hcc_tags = [tag.strip() for tag in raw]

In [5]:
hcc_words = []
for tag in hcc_tags:
    current_tags = tag.split('||')
    for tag in current_tags:
        tag_words = re.findall(r"[\w']+", tag)
        for wd in tag_words:
            if len(wd) > 1:
                hcc_words.append(wd.lower())

In [6]:
hcc_words = list(set(hcc_words))

In [7]:
tag_words_vectors = {}

In [8]:
for word in hcc_words:
    tag_words_vectors[word] = model.get_vector(word)

## Loading tags from scraped metadata

In [9]:
input_path = "D:/RA Maher/Scraped_meta - Copy/"

In [10]:
cols = [
    "id",
    "title",
    "issn",
    "location",
    "note",
    "publisher",
    "address",
    "acmid",
    "journal",
    "url",
    "volume",
    "issue_date",
    "doi",
    "number",
    "month",
    "year",
    "pages",
    "tags",
    "refs"
]

In [11]:
frames = []
for file in listdir(input_path):
    current_df = pd.read_csv(input_path+file, index_col="id", encoding='latin-1')
    frames.append(current_df)

## All Papers

In [12]:
full_df = pd.concat(frames).drop_duplicates().reset_index(drop=True)
full_df.shape

(108271, 19)

## HCC Papers

In [13]:
hcc_tags_lowered = [x.lower() for x in hcc_tags]
hcc_papers = []

In [14]:
papers_hcc_words = []
for index, row in full_df.iterrows():
    current_tags = row['tags'].split('||')
    for tag in current_tags:
        if tag in hcc_tags_lowered:
            hcc_papers.append(row["acmid"])
            break

In [15]:
hcc_papers = list(set(hcc_papers))
len(hcc_papers)

12911

In [16]:
hcc_papers_df = full_df.loc[full_df['acmid'].isin(hcc_papers)]

In [21]:
hcc_papers_df.iloc[:,1:]

Unnamed: 0,title,issn,location,note,publisher,address,acmid,journal,url,volume,issue_date,doi,number,month,year,pages,tags,refs
161,Using Mixture Models for Collaborative Filtering,Not Found,"Chicago, IL, USA",Not Found,ACM,"New York, NY, USA",1007439,Not Found,http://doi.acm.org/10.1145/1007352.1007439,Not Found,Not Found,10.1145/1007352.1007439,Not Found,Not Found,2004,569--578,algorithms||clustering||collaborative filterin...,
184,Automating Commutativity Analysis at the Desig...,Not Found,"Boston, Massachusetts, USA",Not Found,ACM,"New York, NY, USA",1007535,Not Found,http://doi.acm.org/10.1145/1007512.1007535,Not Found,Not Found,10.1145/1007512.1007535,Not Found,Not Found,2004,165--174,alloy||case study||commutativity||concurrency|...,
196,Improving Partial Parsing Based on Error-patte...,1530-0226,Not Found,Not Found,ACM,"New York, NY, USA",1007552,Not Found,http://doi.acm.org/10.1145/1007551.1007552,2,Dec-03,10.1145/1007551.1007552,4,December,2003,301--323,error-pattern analysis||korean grammar checker...,
372,Accommodating Field-dependence: A Cross-over S...,Not Found,"Leeds, United Kingdom",Not Found,ACM,"New York, NY, USA",1008018,Not Found,http://doi.acm.org/10.1145/1007996.1008018,Not Found,Not Found,10.1145/1007996.1008018,Not Found,Not Found,2004,72--76,accessibility||cognitive styles||cross-over de...,
373,Generation As Method for Explorative Learning ...,Not Found,"Leeds, United Kingdom",Not Found,ACM,"New York, NY, USA",1008019,Not Found,http://doi.acm.org/10.1145/1007996.1008019,Not Found,Not Found,10.1145/1007996.1008019,Not Found,Not Found,2004,77--81,algorithms||animation||computer science educat...,
384,"Extending e-Books with Annotation, Online Supp...",Not Found,"Leeds, United Kingdom",Not Found,ACM,"New York, NY, USA",1008032,Not Found,http://doi.acm.org/10.1145/1007996.1008032,Not Found,Not Found,10.1145/1007996.1008032,Not Found,Not Found,2004,132--136,annotation||collaborative and social computing...,
385,Automated Assessment of GUI Programs Using JEWL,Not Found,"Leeds, United Kingdom",Not Found,ACM,"New York, NY, USA",1008033,Not Found,http://doi.acm.org/10.1145/1007996.1008033,Not Found,Not Found,10.1145/1007996.1008033,Not Found,Not Found,2004,137--141,automated assessment||computer science educati...,
390,Use of Large Databases for Group Projects at t...,0097-8418,Not Found,Not Found,ACM,"New York, NY, USA",1008039,SIGCSE Bull.,http://doi.acm.org/10.1145/1026487.1008039,36,Sep-04,10.1145/1026487.1008039,3,June,2004,161--165,capstone course||computer science education||d...,
403,SQLator: An Online SQL Learning Workbench,Not Found,"Leeds, United Kingdom",Not Found,ACM,"New York, NY, USA",1008055,Not Found,http://doi.acm.org/10.1145/1007996.1008055,Not Found,Not Found,10.1145/1007996.1008055,Not Found,Not Found,2004,223--227,collaborative and social computing systems and...,
406,Introducing Assistive Technology in an HCI Course,0097-8418,Not Found,Not Found,ACM,"New York, NY, USA",1008061,SIGCSE Bull.,http://doi.acm.org/10.1145/1026487.1008061,36,Sep-04,10.1145/1026487.1008061,3,June,2004,232--232,assistive technology||computer science educati...,


In [31]:
for index, row in hcc_papers_df.iloc[:10,:].iterrows():
    print("Paper #", row['acmid'], "has tags: ", row['tags'], '\n')
    print("--------------------------------------------------------------------------------------------------------------")

Paper # 1007439 has tags:  algorithms||clustering||collaborative filtering||design and analysis of algorithms||information retrieval||latent class models||linear programming||mixture models||singular value decomposition||text classification||theory 

--------------------------------------------------------------------------------------------------------------
Paper # 1007535 has tags:  alloy||case study||commutativity||concurrency||critical systems||design||formal language definitions||formal languages and automata theory||formal methods||formal software verification||formal specification||human computer interaction (hci)||human factors||lightweight formal methods||model checking||model checking||ocl||proton therapy||radiation therapy||reliability||testing||verification||verification by model checking 

--------------------------------------------------------------------------------------------------------------
Paper # 1007552 has tags:  error-pattern analysis||korean grammar checker|

In [None]:
hcc_papers_vectors = {}
for index, row in hcc_papers_df.iterrows():
    paper_vector = numpy.zeros(300,)
    current_tags = row['tags'].split('||')
    for tag in current_tags:
        tag_words = re.findall(r"[\w']+", tag)
        for word in tag_words:
            if len(word) > 1 and word != 'geft' and word != 'unistrokes' and word != 'fitts\'' and word != "catadioptrical" and word != "web2gether" and word != "hyperfilm" and word != "hypergram" and word != "stretchtext" and word != "applitude" and word != "multitrees" and word != "polyarchies":
                paper_vector += model.get_vector(word)