# Data API
### Emoji Sentiment ( MySQL )

In [1]:
import os
import re
import pandas as pd
import MySQLdb as sql
import urllib.request

In [1]:
source = urllib.request.urlopen('https://unicode.org/emoji/charts/full-emoji-list.html')

pattern1 = r'<td class=\'code\'><a href=\'\#([A-Fa-f0-9_]+)\' name=\'([A-Fa-f0-9_]+)\'>.+</a></td>'
pattern2 = r'<td class=\'chars\'>(.+)</td>'
pattern3 = r'<td class=\'name\'>(.+)</td>'

code = []
char = []
desc = []

for line in source.readlines():
    line = line.decode('utf-8').strip()
    if line[:10] == '<td class=':
        if line[11:15]  == 'code':
            match = re.match(pattern1, line)
            if match is not None:
                    code.append(match.group(1).lower())
        elif line[11:15]  == 'char':
            match = re.match(pattern2, line)
            if match is not None:
                    char.append(match.group(1).lower())
        elif line[11:15]  == 'name':
            match = re.match(pattern3, line)
            if match is not None:
                    desc.append(match.group(1).lower())

print(len(code), len(char), len(desc))

2623 2623 2623


In [2]:
len_code = 0
len_desc = 0
num_code = 0
len_char = 0

for i in range(len(code)):
    code[i] = code[i].split('_')
    num_code = max(num_code, len(code[i]))
    code[i] = ' '.join([str(ord(chr(int(c, base=16)))) for c in code[i]])
    len_code = max(len_code, len(code[i]))
    len_char = max(len_char, len(char[i]))
    len_desc = max(len_desc, len(desc[i]))

print(num_code, len_char, len_code, len_desc)

8 8 48 53


Create database with sentiment scores initialzed to 0:

In [3]:
insert = '''
        INSERT INTO emoji(code, chars, composite, description)
        VALUES('{}','{}',{},'{}');
        '''

with open('emoji.sql','w') as output:
    output.write('''
        DROP TABLE IF EXISTS emoji;
        CREATE TABLE emoji(
            code VARCHAR({}) NOT NULL DEFAULT '',
            chars VARCHAR({}) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL DEFAULT '',
            composite TINYINT(1) NOT NULL DEFAULT 0,
            description VARCHAR({}) NOT NULL DEFAULT '',
            PRIMARY KEY (code)
        ) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
        '''.format(len_code, num_code, len_desc))

    for i in range(len(code)):
        description = ''.join(filter(lambda c: ord(c) < 128, desc[i]))
        output.write(insert.format(code[i], ''.join(char[i]), len(code[i].split()), description))

Import data to the table:
<pre>
mysql -u root -p$MYSQL_ROOT_PASS $DATABASE &lt; emoji.sql
</pre>
Attention: client-server **configuration** might need to be revised ( /etc/mysql/my.cnf )
<pre>
[client]
default-character-set = utf8mb4
[mysql]
default-character-set = utf8mb4
[mysqld]
character-set-client-handshake = FALSE
character-set-server = utf8mb4
collation-server = utf8mb4_unicode_ci
</pre>
Now let's run some simple positive and negative queries:

In [4]:
con = sql.connect(host = 'localhost', database = os.environ['DATABASE'],
                  user = os.environ['DATAUSER'], password = os.environ['MYSQL_PASS'])

data = pd.read_sql("""
SELECT a.description, b.positive, b.negative, b.total, b.sentiment
FROM emoji a JOIN emo_sent b USING(code) WHERE total > 0
""", con)
data = pd.DataFrame(data)

data[data['sentiment'] > 0].sort_values(by=['total','sentiment'], ascending=[0,1]).head(10)

Unnamed: 0,description,positive,negative,total,sentiment
453,face with tears of joy,2149,123,2272,0.292943
17,red heart,1479,11,1490,0.867308
533,loudly crying face,951,62,1013,0.232363
475,smiling face with heart-eyes,690,5,695,0.870496
262,clapping hands,644,11,655,0.719399
417,fire,413,5,418,0.792643
329,two hearts,324,5,329,0.743002
712,double exclamation mark,266,12,278,0.397597
416,fire,271,1,272,0.935049
142,party popper,261,4,265,0.744548


In [5]:
data[data['sentiment'] < 0].sort_values(by=['total','sentiment'], ascending=[0,1]).head(10)

Unnamed: 0,description,positive,negative,total,sentiment
452,face with tears of joy,206,26,232,-0.068935
368,hundred points,149,19,168,-0.09843
532,loudly crying face,146,17,163,-0.02873
525,weary face,95,41,136,-0.609642
636,rolling on the floor laughing,96,12,108,-0.088551
745,female sign,61,34,95,-0.683816
485,unamused face,24,49,73,-0.902472
298,skull,60,12,72,-0.312925
666,person shrugging,53,15,68,-0.460056
246,backhand index pointing down,48,16,64,-0.522082


In [None]:
# test run
!uwsgi --http :9000 --wsgi-file app.py --callable app

And here it is [online](/api/twitter/), or in the [notebook](Example.ipynb).