# Using LIWC dictionary

Github repo: https://github.com/chbrown/liwc-python

In [5]:
import liwc
parse, category_names = liwc.load_token_parser('LIWC2015Dictionary.dic')

In [6]:
category_names

['function',
 'pronoun',
 'ppron',
 'i',
 'we',
 'you',
 'shehe',
 'they',
 'ipron',
 'article',
 'prep',
 'auxverb',
 'adverb',
 'conj',
 'negate',
 'verb',
 'adj',
 'compare',
 'interrog',
 'number',
 'quant',
 'affect',
 'posemo',
 'negemo',
 'anx',
 'anger',
 'sad',
 'social',
 'family',
 'friend',
 'female',
 'male',
 'cogproc',
 'insight',
 'cause',
 'discrep',
 'tentat',
 'certain',
 'differ',
 'percept',
 'see',
 'hear',
 'feel',
 'bio',
 'body',
 'health',
 'sexual',
 'ingest',
 'drives',
 'affiliation',
 'achiev',
 'power',
 'reward',
 'risk',
 'focuspast',
 'focuspresent',
 'focusfuture',
 'relativ',
 'motion',
 'space',
 'time',
 'work',
 'leisure',
 'home',
 'money',
 'relig',
 'death',
 'informal',
 'swear',
 'netspeak',
 'assent',
 'nonflu',
 'filler']

In [56]:
import re

def tokenize(text):
    # you may want to use a smarter tokenizer
    for match in re.finditer(r'\w+', text, re.UNICODE):
        yield match.group(0)

gettysburg = '''Four score and seven years ago our fathers brought forth on
  this continent a new nation, conceived in liberty, and dedicated to the
  proposition that all men are created equal. Now we are engaged in a great
  civil war, testing whether that nation, or any nation so conceived and so
  dedicated, can long endure. We are met on a great battlefield of that war.
  We have come to dedicate a portion of that field, as a final resting place
  for those who here gave their lives that that nation might live. It is
  altogether fitting and proper that we should do this.'''.lower()
gettysburg_tokens = tokenize(gettysburg)

In [57]:
from collections import Counter
gettysburg_counts = Counter(category for token in gettysburg_tokens for category in parse(token))
print(gettysburg_counts)

Counter({'function': 53, 'pronoun': 18, 'relativ': 17, 'verb': 17, 'drives': 13, 'social': 12, 'ipron': 12, 'focuspresent': 11, 'prep': 10, 'space': 10, 'conj': 9, 'auxverb': 9, 'adj': 8, 'affect': 8, 'cogproc': 8, 'ppron': 6, 'affiliation': 6, 'article': 6, 'time': 5, 'focuspast': 5, 'we': 5, 'posemo': 5, 'adverb': 4, 'reward': 3, 'quant': 3, 'negemo': 3, 'anger': 3, 'power': 3, 'tentat': 3, 'number': 2, 'male': 2, 'motion': 2, 'certain': 2, 'compare': 2, 'death': 2, 'interrog': 2, 'differ': 2, 'bio': 2, 'health': 2, 'family': 1, 'cause': 1, 'achiev': 1, 'work': 1, 'leisure': 1, 'they': 1, 'focusfuture': 1, 'discrep': 1})


In [58]:
for key in gettysburg_counts:
    print(f'{key}: {gettysburg_counts[key]}')

number: 2
drives: 13
reward: 3
function: 53
conj: 9
relativ: 17
time: 5
focuspast: 5
pronoun: 18
ppron: 6
we: 5
social: 12
affiliation: 6
family: 1
male: 2
verb: 17
motion: 2
prep: 10
space: 10
ipron: 12
article: 6
adj: 8
affect: 8
posemo: 5
quant: 3
cogproc: 8
certain: 2
auxverb: 9
focuspresent: 11
cause: 1
achiev: 1
compare: 2
adverb: 4
negemo: 3
anger: 3
power: 3
death: 2
work: 1
interrog: 2
differ: 2
tentat: 3
leisure: 1
they: 1
bio: 2
health: 2
focusfuture: 1
discrep: 1


# Using LIWC dictionary with senpy

In [59]:
endpoint = 'http://localhost:5000/api'
import requests
from IPython.display import Code

def query(endpoint, **kwargs):
    '''Query a given Senpy endpoint with specific parameters, and prettify the output'''
    res = requests.get(endpoint,
                       params=kwargs)
    if res.status_code != 200:
        raise Exception(res)
    return Code(res.text, language=kwargs.get('outformat', 'json-ld'))

In [60]:
query(f'{endpoint}/liwc', input="Senpy is awesome")