In [4]:
import tensorflow as tf
import pandas as pd
import sagemaker

In [5]:
# Change working driectory
# cd /home/ec2-user/SageMaker/TF_LSTM_Emotion

In [143]:
df = pd.read_csv('./data/ISEAR_clean.csv', sep=',')

In [144]:
df.head()

Unnamed: 0,emotion,text
0,joy,"During the period of falling in love, each tim..."
1,fear,When I was involved in a traffic accident.
2,anger,When I was driving home after several days of...
3,sadness,When I lost the person who meant the most to me.
4,disgust,The time I knocked a deer down - the sight of ...


In [145]:
df.shape

(7668, 2)

## Convert sentiment label to numeric category

In [146]:
labels = df.emotion.factorize() 

In [147]:
labels_index = labels[1]
df['emotion'] = labels[0]
df.head(10)

Unnamed: 0,emotion,text
0,0,"During the period of falling in love, each tim..."
1,1,When I was involved in a traffic accident.
2,2,When I was driving home after several days of...
3,3,When I lost the person who meant the most to me.
4,4,The time I knocked a deer down - the sight of ...
5,5,When I did not speak the truth.
6,6,When I caused problems for somebody because he...
7,0,When I got a letter offering me the Summer job...
8,1,When I was going home alone one night in Paris...
9,2,When I was talking to HIM at a party for the f...


In [148]:
labels_index

Index(['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt'], dtype='object')

In [12]:
## Split training data

In [88]:
from sklearn.model_selection import train_test_split

training_data, testing_data, y_train, y_test = train_test_split(df.text, df.emotion, test_size=0.2, random_state=123, shuffle=True)

print("training size {}".format(len(training_data)))
print("testing size {}".format(len(testing_data)))

training size 6134
testing size 1534


In [89]:
traind_data = pd.DataFrame(data = {"text" : training_data, "emotion" : y_train})
test_data   = pd.DataFrame(data = {"text" : testing_data,  "emotion" : y_test})

training_data.fillna("", inplace = True)
test_data.fillna("", inplace = True)
 

In [90]:
traind_data.shape

(6134, 2)

In [91]:
test_data.shape

(1534, 2)

In [92]:
traind_data.to_csv('data/train.csv', index=False)
test_data.to_csv('data/test.csv', index=False)

In [93]:
sage_maker_session = sagemaker.Session()

prefix = 'tf-emotion-analysis'
training_input_path = sage_maker_session.upload_data('data/train.csv', key_prefix=prefix+'/training')

training_input_path

's3://sagemaker-us-east-1-105372338271/tf-emotion-analysis/training/train.csv'

In [94]:
training_data = pd.read_csv(training_input_path, sep=',')
training_data.head(5)

Unnamed: 0,text,emotion
0,I was angry about several drivers who showed a...,2
1,A woman picked her nose and spit right next to...,4
2,Bisshop Ter Schure intoduced himself on TV to ...,4
3,When an employer of my younger sister (17 yrs)...,4
4,During my first week in Graz I met a girl who ...,0


In [95]:
type(training_data.text.values[0])

str

In [115]:
%%writefile train.py

import argparse
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.layers import Embedding, Dropout
import pandas as pd

if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()

    # hyperparameters sent by the client are passed as command-line arguments to the script.
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--batch-size', type=int, default=100)
    parser.add_argument('--learning-rate', type=float, default=0.1)
    

    parser.add_argument('--gpu-count', type=int, default=os.environ['SM_NUM_GPUS'])

    # input data and model directories
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
    #parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])

    args, _ = parser.parse_known_args()
    
    epochs     = args.epochs
    lr         = args.learning_rate
    batch_size = args.batch_size
    gpu_count  = args.gpu_count
    model_dir  = args.model_dir
    training_dir   = args.train
    
    training_data = pd.read_csv(training_dir+'/train.csv', sep=',')
    training_data.text.fillna("", inplace=True)
    
    tweet = training_data.text.values
    labels = training_data.emotion.values
    
    num_of_words = 1000
    token = Tokenizer(num_words=num_of_words)
    token.fit_on_texts(tweet)
    
    vocab_size = len(token.word_index) + 1 # 1 is added due to 0 index
    
    tweet_sequence = token.texts_to_sequences(tweet)
    
    max_len = 200
    padded_tweet_sequence = pad_sequences(tweet_sequence, maxlen=max_len)
    
    # Build the model
    embedding_vector_length = 32
    model = Sequential() 
    model.add(Embedding(vocab_size, embedding_vector_length, input_length=max_len) )
    model.add(Dropout(0.2))
    model.add(LSTM(20)) 
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid')) 
    model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy']) 
    
    model.fit(padded_tweet_sequence,labels,validation_split=0.3, epochs=epochs, batch_size=batch_size, verbose=2)
    
    tf.saved_model.simple_save(
        tf.keras.backend.get_session(),
        os.path.join(model_dir, '1'),
        inputs={'inputs': model.input},
        outputs={t.name: t for t in model.outputs})

Overwriting train.py


In [116]:
tf_version = tf.__version__
tf_version

'2.0.3'

In [117]:
role = sagemaker.get_execution_role()

In [118]:
from sagemaker.tensorflow import TensorFlow

tf_estimator = TensorFlow(entry_point='train.py', 
                          role=role,
                          train_instance_count=2, 
                          train_instance_type='ml.c5.4xlarge',
                          script_mode=True,
                          hyperparameters={
                              'epochs': 10,
                              'batch-size': 64
                          }
                         )

No framework_version specified, defaulting to version 1.11. framework_version will be required in SageMaker Python SDK v2. This is not the latest supported version. If you would like to use version 2.2.0, please add framework_version=2.2.0 to your constructor.
2.1.0 is the latest version of tensorflow that supports Python 2. Newer versions of tensorflow will only be available for Python 3.Please set the argument "py_version='py3'" to use the Python 3 tensorflow image.


In [119]:
training_input_path

's3://sagemaker-us-east-1-105372338271/tf-emotion-analysis/training/train.csv'

In [120]:
tf_estimator.fit({'train': training_input_path})

'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


2020-10-24 11:42:58 Starting - Starting the training job...
2020-10-24 11:43:00 Starting - Launching requested ML instances.........
2020-10-24 11:44:32 Starting - Preparing the instances for training...
2020-10-24 11:45:18 Downloading - Downloading input data......
2020-10-24 11:46:17 Training - Training image download completed. Training in progress.[34m2020-10-24 11:46:19,362 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-10-24 11:46:19,369 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[35m2020-10-24 11:46:20,004 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[35m2020-10-24 11:46:20,012 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[35m2020-10-24 11:46:20,485 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[35m2020-10-24 11:46:20,500 sagemaker-containers INFO     No GPUs detec

In [121]:
import time

endpoint_name = 'tf-emotion-analysis'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
end_point = tf_estimator.deploy(initial_instance_count=1, instance_type='ml.m5.4xlarge', endpoint_name=endpoint_name)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.
'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


-----------!

In [122]:
print(end_point.endpoint)

tf-emotion-analysis2020-10-24-11-52-15


In [131]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preprocess_texts(text):
    
    num_of_words = 100
    token = Tokenizer(num_words=num_of_words)
    
    training_data.fillna("", inplace=True)
    token.fit_on_texts(training_data.text.values)
    
    tweet_sequence = token.texts_to_sequences(text)
    
    max_len = 200
    padded_tweet_sequence = pad_sequences(tweet_sequence, maxlen=max_len)
    
    return padded_tweet_sequence

In [132]:
test_data.fillna("", inplace=True)
test_texts = preprocess_texts(test_data.text.values)

In [149]:
labels_index

Index(['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt'], dtype='object')

In [150]:
import numpy as np

for i, tweet in enumerate(test_texts):
 
    print(test_data.text.values[i])
    prediction = end_point.predict(tweet)['predictions']
    prediction = np.array(prediction).round().item()
    print('Actual sentiment: {} ----- Predicted sentiment  {} \n'.format(test_data.emotion.values[i], labels_index[int(prediction)]))

I was walking home from a place on southside and I live on á Northside and I was followed. I headed my most direct route.
Actual sentiment: 1 ----- Predicted sentiment  fear 

Somebody who knows me very well discovered that I had told him a á lie.
Actual sentiment: 5 ----- Predicted sentiment  fear 

During the Chinese lessons a student often showed off. Moreover á he misbehaved and dressed very badly.  So disgusting !
Actual sentiment: 4 ----- Predicted sentiment  fear 

I am a kindergarten teacher and I am thoroughly weary of my job.  á After having taken the university entrance exam I suffered from á anxiety for weeks as I did not want to carry on with my work - studies á were the only alternative. 
Actual sentiment: 1 ----- Predicted sentiment  fear 

I went to the school library and asked for a book, but the á librarian told me that there wasn't such. However when an other á colleague asked for it she gave it to him.
Actual sentiment: 2 ----- Predicted sentiment  fear 

My first g

KeyboardInterrupt: 