In [1]:
!which python

/usr/local/bin/python


In [2]:
from elasticsearch_dsl import connections
from elasticsearch import Elasticsearch


def create_es_connection() -> Elasticsearch:
    """ElasticSearch Connection을 만듭니다.

    Raises:
        e: [description]

    Returns:
        ElasticSearch: [ElasticSearch Connection]
    """
    try:
        conn = connections.create_connection(
            alias="connection", hosts=["localhost:9200"], timeout=60
        )
        return conn
    except Exception as e:
        raise e


In [3]:
conn = create_es_connection()

In [4]:
from datetime import datetime
from elasticsearch_dsl import Document, Text, Float, Date, Keyword

class Vocabulary(Document):
    name = Text()
    meaning = Text()
    tags = Keyword()
    score: Float()
    created_at: Date() = datetime.now()
    updated_at: Date() = datetime.now()

    class Index:
        name = "vocabularies"
    
    def save(self, **kwargs):
        # TODO: check vocab existence
        return super().save(**kwargs)



In [14]:
Vocabulary.init()

ConnectionError: ConnectionError(<urllib3.connection.HTTPConnection object at 0xffff7f5a9f70>: Failed to establish a new connection: [Errno 111] Connection refused) caused by: NewConnectionError(<urllib3.connection.HTTPConnection object at 0xffff7f5a9f70>: Failed to establish a new connection: [Errno 111] Connection refused)

In [13]:
from elasticsearch_dsl import connections
connections.create_connection(
            hosts=["localhost:9200"], timeout=60
        )

<Elasticsearch([{'host': 'localhost', 'port': 9200}])>

In [63]:
from collections import Counter
import string


IGNORE_TOKENS = ['the', 'of', 'to', 'and', 'a', 'in', 'is', 'that', 'as', 'are', 'was', 'on', 'at', 'not', 'for', 'be', 'it', 'we', 'or', 'his', 'an', 'you', 'this', 'by', 'can', 'our', 'i', 'have', 'their', 'from', 'one', 'they', 'there', 'with', 'but', 'people', 'human', '\\xe2\\x80\\x95', 'had', 'my', 'what', 'do', 'more', 'your', 'many', 'he', 'its', 'all', 'could', 'online', 'use', 'has', '(', ')', 'also', 'who', 'most', 'than', 'if', 'some', 'when', 'new', 'does', 'should', 'will', 'about', 'where', 'being', 'way']

table = str.maketrans('', '', string.punctuation) 

word_counter = Counter()
with open('../seed/raw.txt', 'r') as f:
    for block in f:
        words = str(block).lower().translate(table).split(' ')
        words = [w for w in words if w not in IGNORE_TOKENS and not w.isdigit()]
        
        for word in words:
            word_counter[word] += 1
            
print(word_counter.most_common()[-50:])
        

[('me”', 1), ('ran', 1), ('away', 1), ('mom”', 1), ('excited', 1), ('pleased', 1), ('rid', 1), ('lesson', 1), ('till', 1), ('came', 1), ('back', 1), ('interfered', 1), ('business', 1), ('playful', 1), ('curious', 1), ('interested', 1), ('committed', 1), ('himself', 1), ('secretly', 1), ('looked', 1), ('inside', 1), ('brother’s', 1), ('shouted', 1), ('working', 1), ('“mom', 1), ('he’s', 1), ('video”', 1), ('naughtily', 1), ('smiling', 1), ('mad', 1), ('bothering', 1), ('posted', 1), ('school', 1), ('angry', 1), ('naughty', 1), ('asked', 1), ('loudly', 1), ('felix”', 1), ('felix’s', 1), ('next', 1), ('hear', 1), ('“i’m', 1), ('lecture', 1), ('class”', 1), ('argued', 1), ('accusation', 1), ('mischievously', 1), ('stuck', 1), ('tongue', 1), ('brother\n', 1)]


In [77]:
from dataclasses import dataclass
from typing import List, Optional

@dataclass
class VocabFeed:
    name: str
    meaning: Optional[str] = None
    frequency: Optional[int] = None

    @staticmethod
    def from_counter(c: Counter):
        return [VocabFeed(name=voca, frequency=freq) for (voca, freq) in c.items()]

In [78]:
VocabFeed.from_counter(word_counter)

1),
 VocabFeed(name='hazards', meaning=None, frequency=1),
 VocabFeed(name='scientist', meaning=None, frequency=1),
 VocabFeed(name='devised', meaning=None, frequency=1),
 VocabFeed(name='nonetheless', meaning=None, frequency=1),
 VocabFeed(name='oneself', meaning=None, frequency=1),
 VocabFeed(name='remains', meaning=None, frequency=1),
 VocabFeed(name='problematic', meaning=None, frequency=1),
 VocabFeed(name='obvious', meaning=None, frequency=3),
 VocabFeed(name='drawback', meaning=None, frequency=2),
 VocabFeed(name='danger', meaning=None, frequency=1),
 VocabFeed(name='exists', meaning=None, frequency=1),
 VocabFeed(name='nothing', meaning=None, frequency=1),
 VocabFeed(name='reduce', meaning=None, frequency=2),
 VocabFeed(name='range', meaning=None, frequency=1),
 VocabFeed(name='data', meaning=None, frequency=1),
 VocabFeed(name='generate', meaning=None, frequency=1),
 VocabFeed(name='anatomy', meaning=None, frequency=1),
 VocabFeed(name='physiology', meaning=None, frequency=1),

In [68]:
word_counter.items()

lion’s', 1), ('historians’', 1), ('put', 3), ('across', 1), ('point', 5), ('view', 2), ('environment', 5), ('make', 3), ('laws', 1), ('voice', 2), ('wild', 1), ('india', 1), ('present', 1), ('rate', 1), ('consumption', 2), ('completely', 1), ('unsustainable', 1), ('forest', 1), ('wetlands', 1), ('wastelands', 1), ('coastal', 1), ('zones', 2), ('ecosensitive', 1), ('seen', 2), ('disposable', 1), ('accelerating', 1), ('demands', 1), ('population', 2), ('ask', 3), ('any', 3), ('change', 2), ('behaviour', 1), ('―', 12), ('whether', 2), ('cut', 1), ('alter', 1), ('lifestyles', 1), ('decrease', 1), ('growth', 1), ('violation', 1), ('rights', 4), ('‘wrongs’', 1), ('changed', 3), ('thinking', 1), ('difference', 1), ('between', 4), ('humans', 3), ('rest', 3), ('environment\n', 1), ('difficulties', 1), ('arise', 1), ('think', 3), ('machines', 4), ('collaborative', 1), ('systems', 2), ('assign', 1), ('whatever', 2), ('tasks', 1), ('automated', 1), ('leave', 1), ('ends', 1), ('up', 6), ('requiring