Smiley Conversion

In [45]:
import emot
from deep_translator import GoogleTranslator
from googletrans import Translator
import re

#### Function to get the right english translation 
from the 'mean' value in the dictionary returned by the <code>emoticon()</code> function

In [46]:
'''
1. '^[^\s,]+,': match the first word before the first comma
    - ^:        start of the string
    - [^\s,]+:  match any character that is not a space or a comma, one or more times

2. '^[^,]+,\s*(\w+)': match the first word after the first comma if there are multiple words before the comma
    - ^:        start of the string
    - [^,]+:    one or more characters that are not commas, followed by a comma
    - \s*:      zero or more whitespace characters (e.g., spaces or tabs)
    - (\w+):    one or more word characters (e.g., letters, digits, or underscores), captured in a group
    
3. '^\s*([\w\s]+?)\s*or\b': match the first words before the word 'or'
    - ^:            start of the string
    - \s*:          zero or more whitespace characters (e.g., spaces or tabs)
    - ([\w\s]+?):   one or more word characters (e.g., letters, digits, or underscores), or whitespace characters, captured in a non-greedy group
    - \s*:          zero or more whitespace characters
    - or\b:         the word "or", followed by a word boundary (to avoid matching words like "order" or "orange")
'''
def replacer(meanings):
    regex = re.compile(r'^[^\s,]+,')
    match = regex.match(meanings)
    if match:
        return match.group(0)[:-1]
    
    regex = re.compile(r'^[^,]+,\s*(\w+)')
    match = regex.match(meanings)
    if match:
        return match.group(1)
    
    regex = re.compile(r'^\s*([\w\s]+?)\s*or\b')
    match = regex.match(meanings)
    if match:
        return match.group(1)
    
    return meanings

In [47]:
def translate_emoticons(text):
    emotions = emot.core.emot().emoticons(text)
    translated = GoogleTranslator(source='auto', target='de')
    correction = 0
    for i, location in enumerate(emotions['location']):
        emoticon = emotions['value'][i]
        start = location[0] + correction
        end = location[1] + correction
        meaning = emotions['mean'][i]
        replacer_text = translated.translate(replacer(meaning))
        text = text[:start] + replacer_text + text[end:]
        correction += len(replacer_text) - len(emoticon)     # correction for the length of the emoticon
    return text
test = "Hoi Andre i bi nöd bös :/, :D, :P, :), :(, :)), :))), :-)"
translate_emoticons(test)

'Hoi Andre i bi nöd bös Skeptisch, Lachen, frech, Glückliches Gesicht, Stirnrunzeln, Sehr glückliches Gesicht, Sehr sehr glückliches Gesicht, Smiley mit fröhlichem Gesicht'

In [48]:
# need to install googletrans version 3.1.0a0
def translate_swiss_german(text):
    translator = Translator()
    return translator.translate(translator.translate(text, src='de', dest='en').text, src='en', dest='de').text

In [49]:
texte = ['Das isch en super Sach!', 
         'I lieb di au!', 
         "Chrischtbaumschmuck, Brunsli, Nusshüüfeli, Haferflockeguetzli, Zimetstärn hani gärn, Mailänderli au, Aenisguetzli, Chrischtchindli, es lüütet es Glöögli, Schtilli Nacht, Schternschnuppe,s' Jesuschindli liit i de Chrippe, äs isch zu euch Mänsche uf d'Aerde abe cho"]

In [50]:
for text in texte:
    print(text)
    print(translate_swiss_german(text))
    print()

Das isch en super Sach!
Das ist eine tolle Sache!

I lieb di au!
Ich liebe dich jetzt!

Chrischtbaumschmuck, Brunsli, Nusshüüfeli, Haferflockeguetzli, Zimetstärn hani gärn, Mailänderli au, Aenisguetzli, Chrischtchindli, es lüütet es Glöögli, Schtilli Nacht, Schternschnuppe,s' Jesuschindli liit i de Chrippe, äs isch zu euch Mänsche uf d'Aerde abe cho
Christbaumschmuck, Brunsli, Nusshüüfeli, Haferflockenkekse, Zimtstangen Hani Gärn, Mailänderli au, Aenisguetzli, Chrischtchindli, es lüütet Glöögli, Schtilli Nacht, Schternschnuppe, s'Jesuschindli liit i de Chrippe, es ist euch Leuten auf d'Aerde aber cho

