# Slang Translator

In [1]:
import pandas as pd
import os

In [2]:
current_directory = os.path.realpath('data.py')
parent_directory = parent_directory = os.path.dirname(current_directory)


In [3]:
class ImportData:

    def __ini__(self):
        self.current_directory = current_directory
        self.parent_directory = parent_directory

    def abbreviations():

        data_abbreviations = pd.read_csv(os.path.join(parent_directory,'..','..' ,'live-sentiment-tracker', 'data_raw','Abbreviations_and_Slang.csv'))

        return data_abbreviations

    def slangs():

        data_slangs = pd.read_csv(os.path.join(parent_directory,'..','..', 'live-sentiment-tracker','data_raw','slang.csv'))[['acronym', 'expansion']]

        return data_slangs

In [4]:
data_abbreviations = ImportData.abbreviations()
data_slangs = ImportData.slangs()

remove_list = [13, 15, 16, 27, 41, 63, 66, 67, 112]
add_list = [159, 160, 185, 330, 2362]

In [6]:
# Concatenating slang and abbreviation datasets
data_slangs.rename(columns = {'acronym':'Abbreviations','expansion':'Text'}, inplace = True)
data_slangs = data_slangs[data_slangs.index.isin(add_list)]
data_abbreviations.drop(axis=0, labels=remove_list, inplace=True)
data_slang_all = pd.concat([data_abbreviations , data_slangs], axis=0)

# Drop duplicates and null values
data_slang_all.drop_duplicates(inplace=True)
data_slang_all.dropna(inplace=True)

# Checking
print(f'Data + Slang full dataset shape: {data_slang_all.shape}')
print(f'Data + Slang full dataset null: {data_slang_all.isnull().sum()[0]}')
print(f'Data + Slang full dataset duplicates: {data_slang_all.duplicated().sum()}')

Data + Slang full dataset shape: (108, 2)
Data + Slang full dataset null: 0
Data + Slang full dataset duplicates: 0


  print(f'Data + Slang full dataset null: {data_slang_all.isnull().sum()[0]}')


In [7]:
data_slang_all.to_csv('data_slang_all_preprocessed.csv')

In [8]:
# Transforming DF into dict for mapping
slang_dict = dict(zip(data_slang_all.Abbreviations, data_slang_all.Text))

In [9]:
import string

class SlangTranslation:

  def __init__(self, sentence):

    self.sentence = sentence

  def remove_punctuation(self, sentence):
    """Iterates through each word of the string and removes punctuation"""
    for punctuation in string.punctuation:
        sentence = sentence.replace(punctuation, '')

    return sentence

  def string_translator(self, sentence):
    """Iterates through each word of the string and translates them"""

    sentence = ' '.join([slang_dict.get(i, i) for i in sentence.split()])

    return sentence

  def apply_translator(self, sentence):
    """Takes the text column as input, outputs the same column translated."""

    sentence = self.remove_punctuation(sentence)

    sentence = self.string_translator(sentence)

    return sentence

# Model

## Vader (return overall sentiment)

In [10]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def vader(sentence):
    """
    Function to print sentiment and return overall sentiment
    """
    # Instanciating SentimentIntensityAnalyzer object
    vader_model = SentimentIntensityAnalyzer()
    sentence = SlangTranslation(sentence).apply_translator(sentence)
    # Creating sentiment dict from vader model
    sentiment_dict = vader_model.polarity_scores(sentence)

    # Printing sentiment percentage
    print(f'Sentence: "{sentence}"')
    print(f"This sentence is: {sentiment_dict['pos']*100} % positive")
    print(f"{sentiment_dict['neu']*100} % neutral")
    print(f"{sentiment_dict['neg']*100} % negative")

    # Conditional to return overall sentiment of the sentence
    if sentiment_dict['compound'] >= 0.05:
        overall_sentiment = 'The overall sentiment is Positive 😊'
        return overall_sentiment

    if sentiment_dict['compound'] <= -0.05:
        overall_sentiment = 'The overall sentiment is Negative 😔'
        return overall_sentiment

    else:
        overall_sentiment = 'The overall sentiment is Neutral 😐'
        return overall_sentiment


## Vader (return vader scores)

In [11]:

def vader_scores(sentence):
    """
    Function to return the dict with the sentiments
    """
    vader_model = SentimentIntensityAnalyzer()
    sentence = SlangTranslation(sentence).apply_translator(sentence)
    sentiment_dict = vader_model.polarity_scores(sentence)
    print(f'Sentence: "{sentence}"')
    return sentiment_dict
