## Search authors from DB

In [7]:
def searchAuthors(name=None, institution=None, topic=None):
    import sqlite3

    database = "database/database.sqlite"
    conn = sqlite3.connect(database)
    
    l = []
    likeName = '%' + name.upper() + '%' if (name) else None 
    likeInstitution = '%' + institution.upper() + '%' if (institution) else None
    topic = topic if (topic) else None
    
    with conn:
        cur = conn.cursor()
        cur.execute("SELECT a.id " + 
                    "FROM authors a JOIN paper_authors pa ON a.id=pa.author_id JOIN paper_topic pt ON pt.paper_id=pa.paper_id " + 
                    "JOIN topics t ON t.id=pt.topic_id LEFT JOIN author_institution ai ON ai.author_id=a.id LEFT JOIN institutions i ON i.id=ai.institution_id " + 
                    "JOIN author_ranking ar on ar.author_id=a.id " +
                    "WHERE a.name LIKE IFNULL(?,'%') AND IFNULL(i.name,'') LIKE IFNULL(?,'%') AND t.name = IFNULL(?,t.name) " +
                    "GROUP BY a.id, ar.pagerank_citations ORDER BY ar.pagerank_citations DESC", (likeName, likeInstitution, topic))
        
        rows = cur.fetchall()
        
        for row in rows:
            l.extend(row)
            
    return(l)

### Example

In [8]:
authors = searchAuthors(name="jor", institution="", topic="")
print(authors)
print()

# get details for the obtained authors
import sqlite3
database = "database/database.sqlite"
conn = sqlite3.connect(database)

with conn:
    cur = conn.cursor()
            
    for id in authors:
        cur.execute("SELECT id, name FROM authors WHERE id = " + str(id))

        row = cur.fetchone()
        print("{} - {}".format(row[0], row[1]))

[330, 3032, 3005, 7392]

330 - Michael I. Jordan
3032 - Joris M. Mooij
3005 - Jorge D?ez
7392 - Jorge Batista


## Search papers from index

In [1]:
def searchPapers(year=None, author=None, topic=None, userQuery=None):
        
    # Open the existing index
    import whoosh.index as index

    index_dir = "index"

    ix = index.open_dir(index_dir)
    
    # Lemmatization of user query
    from nltk.stem.wordnet import WordNetLemmatizer
    lemma = WordNetLemmatizer()
    userQuery = " ".join(lemma.lemmatize(word,'n') for word in userQuery.split())
    userQuery = " ".join(lemma.lemmatize(word,'v') for word in userQuery.split())
    
    # Parse with filter on fields    
    from whoosh import query
    from whoosh import qparser
    from whoosh.qparser import QueryParser
    from whoosh.qparser import MultifieldParser

    with ix.searcher() as s:
        if(not userQuery):
            qp = QueryParser("id", schema=ix.schema)
            user_q = qp.parse("*")
            
        else:
            # 0 = importance to documents with one of the terms
            # 1 = importance to documents with all of the terms
            og = qparser.OrGroup.factory(0.8)

            # search both in title and text
            mparser = MultifieldParser(["title", "paper_text"], schema=ix.schema, group=og)
            user_q = mparser.parse(userQuery)

            #search only in text
            #qp = QueryParser("paper_text", schema=ix.schema, group=og)
            #user_q = qp.parse(userQuery)
        

        # Filter results for fields
        allow_q = query.NullQuery        
        
        if (year):
            allow_q = allow_q & query.Term("year", year)
        
        if (author):
            formattedAuthors = author.lower().split()
            for fa in formattedAuthors:
                fa = "*" + fa + "*"
                print(fa)
                allow_q = allow_q & query.Wildcard("authors", fa)
        
        if (topic):
            topicParser = qparser.QueryParser("topic", schema=ix.schema)
            allow_q = allow_q & topicParser.parse(topic)

        if (not year and not author and not topic):
            results = s.search(user_q)
        else:
            results = s.search(user_q, filter=allow_q)
        
        papers = []
        for result in results:
            papers.extend([int(result['id'])])
        return papers

### Example

In [5]:
papers = searchPapers(year=1994, author="", topic="Probabilistic Models", userQuery="tries views densities")
print(papers)
print()

# get details for the obtained papers
import sqlite3
database = "database/database.sqlite"
conn = sqlite3.connect(database)

with conn:
    cur = conn.cursor()
            
    for id in papers:
        cur.execute("SELECT id, year, title FROM papers WHERE id = " + str(id))

        row = cur.fetchone()
        print("{} - {} ({})".format(row[0], row[2], row[1]))

try view density
[929, 962, 907, 999, 909, 1017, 1019, 885, 978]

929 - Hierarchical Mixtures of Experts Methodology Applied to Continuous Speech Recognition (1994)
962 - Recognizing Handwritten Digits Using Mixtures of Linear Models (1994)
907 - Classifying with Gaussian Mixtures and Clusters (1994)
999 - Correlation and Interpolation Networks for Real-time Expression Analysis/Synthesis (1994)
909 - Forward dynamic models in human motor control: Psychophysical evidence (1994)
1017 - A Critical Comparison of Models for Orientation and Ocular Dominance Columns in the Striate Cortex (1994)
1019 - A Mixture Model System for Medical and Machine Diagnosis (1994)
885 - A Comparison of Discrete-Time Operator Models for Nonlinear System Identification (1994)
978 - Pattern Playback in the 90s (1994)
