# Sentiment Analysis

## 1. data load

In [None]:
SOURCE_FILEPATH = "C:\\Users\\hello\\Desktop\\interview\\"
OUTPUT_PATH = "C:\\Users\\hello\\Desktop\\result\\"

In [None]:
import pandas as pd
import os
from afinn import Afinn
from konlpy.tag import Okt
from textblob import TextBlob
from googletrans import Translator
import re
import time

from selenium import webdriver 
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
afinn = Afinn(emoticons=True)

In [None]:
def file_list_in_directory(path, extension='.csv'):
    file_list = os.listdir(path)
    csv_file_list = [file for file in file_list if file.endswith(extension)]
    return csv_file_list

In [None]:
def save_file(filepath: str, df):
    df.to_csv(filepath, sep=',', encoding='utf-8-sig')

In [None]:
def load_file(source_path):
    print(source_path)
    df = pd.read_csv(source_path)
    return df

In [None]:
def get_eng_score_afinn(s):
    s = str(s)
    try:
        return afinn.score(s)
    except:
        print("Score Error ", s)
        return 0

In [None]:
LETTERS = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE)
def text_cleaning(s):
    return LETTERS.sub('', str(s))    

In [None]:
file_list = file_list_in_directory(SOURCE_FILEPATH)

## google trans api

In [None]:
def translator_api(text):
    try:
        translator = Translator()
        comment = translator.translate(text=text, dest='en').text
        return comment
    except Exception as e:
        return ""

## google trans page using selenium

In [None]:
def ready_to_translate_driver():
    driver = webdriver.Chrome(ChromeDriverManager().install())
    driver.get("https://translate.google.com/#view=home&op=translate&tl=en")
    driver.find_element_by_xpath('//*[@id="sugg-item-ko"]').click()
    driver.implicitly_wait(3)
    
    driver.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]').click()
    webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
    driver.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[2]/div[2]').click()
    webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
    driver.implicitly_wait(3)
    return driver

def translate(driver, source_text):
    try:
        elem = driver.find_element_by_xpath("//*[@id=\"source\"]")
        elem.clear()
        elem.send_keys(source_text)
        time.sleep(1)
        driver.implicitly_wait(3)
        result = driver.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[3]/div[1]/div[2]/div/span[1]')
        return result.text
    except:
        print(source_text)
        return source_text

In [None]:
driver = ready_to_translate_driver()

def sentimental_using_afinn(filename):
    print(filename)
    df = load_file(SOURCE_FILEPATH + filename)
    score_list = list()
    translated_list = list()
    for idx, row in df.iterrows():
        comment = text_cleaning(row['contents'])
        try:
            if row['hangul'] == True:
                time.sleep(1)
                comment = translate(driver, comment)
                time.sleep(1)
        finally:
            translated_list.append(comment)
            score_list.append(get_eng_score_afinn(comment))
        
    df.insert(4, 'translate', translated_list)
    df.insert(5, 'score(afinn)', score_list)
    save_file(OUTPUT_PATH + filename, df)

In [None]:
for filename in file_list:
    sentimental_using_afinn(filename)

# Merge

In [None]:
import pandas as pd

In [None]:
l = ['ariana_grande', 'bts', 'chungha', 'doja_cat', 'dualipa', 'fifth_harmony', 'g_idle', 'in_real_life', 'itzy', 'justin_bieber', 'kangdaniel', 'little_mix', 'mamamoo', 'nct', 'oh_my_girl', 'onedirection', 'prettymuch', 'why_dont_we', 'winner', 'zico' ]

In [None]:
for name in l:
    path1 = "C://Users/hello/Desktop/interview/" + name + "-1.csv"
    path2 = "C://Users/hello/Desktop/interview/" + name + "-2.csv"
    path3 = "C://Users/hello/Desktop/interview/" + name + "-3.csv"

    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path2)
    df3 = pd.read_csv(path3)

    df = pd.concat([df1, df2, df3])
    df = df.drop(df.columns[[0]], axis=1)
    df.insert(0, 'target', name)

    output_path = "C://Users/hello/Desktop/result/"
    df.to_csv(output_path + name +".csv", mode='w', encoding='utf-8-sig',
             index=False)

In [None]:
path = "C://Users/hello/Desktop/result/"
l = ['bts', 'chungha', 'doja_cat', 'dualipa', 'fifth_harmony', 'g_idle', 'in_real_life', 'itzy', 'justin_bieber', 'kangdaniel', 'little_mix', 'mamamoo', 'nct', 'oh_my_girl', 'onedirection', 'prettymuch', 'why_dont_we', 'winner', 'zico' ]
merge = pd.read_csv(path + 'ariana_grande' + '.csv')

In [None]:
for name in l:
    df = pd.read_csv(path + name + '.csv')
    merge = pd.concat([merge, df])

In [None]:
merge.to_csv(path + "all.csv", mode='w', encoding='utf-8-sig',
             index=False)

# Aggregation

In [None]:
output_path = "C://Users/hello/Desktop/result/"
df = pd.read_csv(output_path + "zico.csv")

In [None]:
df.columns

In [None]:
path = "C://Users/hello/Desktop/result/"
df = pd.read_csv(path + 'all.csv')
df.insert(8, 'score', 0)

In [None]:
df

In [None]:
l = len(df)
score = 0
for i in range(l):
    if df['likeCount'][i] != 0:
        df.at[i, 'score'] = df['score(afinn)'][i] * df['likeCount'][i]
    else:
        df.at[i, 'score'] = df['score(afinn)'][i]

df.to_csv(output_path + "all.csv", mode='w', encoding='utf-8-sig', index=False)

# Data Visualization

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
import chart_studio.plotly as py
import plotly.figure_factory as ff
import pandas as pd

df = pd.read_csv("C://Users/hello/Desktop/result/zico.csv")

table = ff.create_table(df)

In [9]:
py.iplot(table, filename='jupyter-table1')

KeyboardInterrupt: 