# Negative Comments Classification

## Importing Data

In [1]:
import pandas as pd
import numpy as np

import re
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/ec2-user/nltk_data...


In [2]:
#df = pd.read_csv("G:/Shared drives/Unidad Compartida Pachitos/Data Science Projects Pachitos/HackOff-CompanySentiments/tesla_sf.csv")

In [4]:
from sagemaker import get_execution_role

role = get_execution_role()
data_location = 's3://tweets-hackoff1/tweets_tesla_sf.csv'
df = pd.read_csv(data_location)

In [5]:
class SentimentAnalysisTweets():

    def __init__(self, df, column_name):
        self.tweets_ = df
        self.sid_ = SentimentIntensityAnalyzer()

        self.preprocess(column_name)

    def preprocess(self, column_name):
        self.tweets_['tweets_clean'] = self.tweets_.apply(lambda row: self.clean_text(row[column_name]), axis=1)

        return self

    def clean_text(self, observation):
        
        observation = re.sub(r'[^\$\w\s]', '', str(observation).lower().strip())
        observation = re.sub(r'^RT[\s]+', '', observation)
        observation = re.sub(r'https?:\/\/.*[\r\n]*', '', observation) # removing hyperlinks
        observation = re.sub(r'#', '', observation) #removing hash # sign

        return observation

    def create_label(self):
        self.tweets_['sentiment'] = self.tweets_.apply(lambda row: self.sid_.polarity_scores(row['text']), axis=1)
        self.tweets_['flag'] = self.tweets_['sentiment'].apply(lambda row: 'positive' if row.get('compound') >= 0 else 'negative')

        return self

In [6]:
tweet_class = SentimentAnalysisTweets(df, column_name='text')

In [7]:
tweet_class.create_label()

<__main__.SentimentAnalysisTweets at 0x7f89d4988860>

In [8]:
df_label = tweet_class.tweets_.copy()
df_label.drop(['tweets_clean','sentiment'], axis=1, inplace=True)
df_label.to_csv('label_tweets.csv', index=False)

In [9]:
# instantiate S3 client and upload to s3
import boto3

s3 = boto3.resource('s3')
s3.meta.client.upload_file('label_tweets.csv', 'tweets-hackoff1', 'label_tweets.csv')