In [2]:
import transformers
import torch
import math
import pandas as pd
import numpy as np
import emoji
import re
from transformers import (
    RobertaForSequenceClassification, RobertaTokenizer, BertForSequenceClassification, 
    BertTokenizer, AutoModelForSequenceClassification, AutoTokenizer, AdamW
)
import random
import time


seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [14]:
# tokenizer = RobertaTokenizer.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
# # model = RobertaForSequenceClassification.from_pretrained('zhayunduo/roberta-base-stocktwits-finetuned')
# model = RobertaForSequenceClassification.from_pretrained('./data/sentiment-prediction-model.h5')

In [97]:
from dash import html
dir(html)

['A',
 'Abbr',
 'Acronym',
 'Address',
 'Area',
 'Article',
 'Aside',
 'Audio',
 'B',
 'Base',
 'Basefont',
 'Bdi',
 'Bdo',
 'Big',
 'Blink',
 'Blockquote',
 'Br',
 'Button',
 'Canvas',
 'Caption',
 'Center',
 'Cite',
 'Code',
 'Col',
 'Colgroup',
 'Content',
 'Data',
 'Datalist',
 'Dd',
 'Del',
 'Details',
 'Dfn',
 'Dialog',
 'Div',
 'Dl',
 'Dt',
 'Em',
 'Embed',
 'Fieldset',
 'Figcaption',
 'Figure',
 'Font',
 'Footer',
 'Form',
 'Frame',
 'Frameset',
 'H1',
 'H2',
 'H3',
 'H4',
 'H5',
 'H6',
 'Header',
 'Hgroup',
 'Hr',
 'I',
 'Iframe',
 'Img',
 'Ins',
 'Kbd',
 'Keygen',
 'Label',
 'Legend',
 'Li',
 'Link',
 'Main',
 'MapEl',
 'Mark',
 'Marquee',
 'Meta',
 'Meter',
 'Nav',
 'Nobr',
 'Noscript',
 'ObjectEl',
 'Ol',
 'Optgroup',
 'Option',
 'Output',
 'P',
 'Param',
 'Picture',
 'Plaintext',
 'Pre',
 'Progress',
 'Q',
 'Rb',
 'Rp',
 'Rt',
 'Rtc',
 'Ruby',
 'S',
 'Samp',
 'Script',
 'Section',
 'Select',
 'Shadow',
 'Slot',
 'Small',
 'Source',
 'Spacer',
 'Span',
 'Strike',
 'Strong',

In [87]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def Sentiment(sent,model=model,tokenizer=tokenizer):
    encoded_dict = tokenizer.encode_plus(
                      sent, 
                      add_special_tokens = True,
                      truncation=True,
                      max_length = 64,
                      padding='max_length',
                      return_attention_mask = True,
                      return_tensors = 'pt')

    input_id = torch.LongTensor(encoded_dict['input_ids']).to(device)
    attention_mask = torch.LongTensor(encoded_dict['attention_mask']).to(device)
    model = model.to(device)

    with torch.no_grad():
        outputs = model(input_id, token_type_ids=None, attention_mask=attention_mask)

    logits = outputs[0]
    index = logits.argmax()
    return index,logits


def process_text(texts):
    # lowercase
    # message = message.lower() # RoBERTa tokenizer is uncased
    # remove URLs
    texts = re.sub(r'https?://\S+', "", texts)
    texts = re.sub(r'www.\S+', "", texts)
    # remove extra spaces'
    texts = texts.replace('&#39;', "'")
    # remove symbol names
    texts = re.sub(r'(\#)(\S+)', r'hashtag_\2', texts)
    texts = re.sub(r'(\$)([A-Za-z]+)', r'cashtag_\2', texts)
    # remove usernames
    texts = re.sub(r'(\@)(\S+)', r'mention_\2', texts)
    # demojize
    texts = emoji.demojize(texts, delimiters=("", " "))

    return texts.strip()

def checkSenti(sent,return_logits=True):
    labels = ['Bearish','Bullish']
    sent_processed = process_text(sent)
    index,logits = Sentiment(sent_processed)
    if return_logits:
        logit0 = math.exp(logits[0][0])
        logit1 = math.exp(logits[0][1])
        logits = [logit0/(logit0+logit1),logit1/(logit0+logit1)]
        return [labels[index], max(logits)]
#         return f'Sentiment is predicted to be {(round(max(logits) * 100, 2))}% {labels[index]}'
#     print(logits)
    return labels[index]


In [96]:
prediction = ['Bearish', 0.959383]

round(prediction[1]*100, 2)

95.94

In [91]:
prediction = ['Bearish']
color = 'success' if prediction[0] == 'Bullish' else 'danger'
print(color)

danger


In [84]:
if 'Bullish' in checkSenti('im feelin good todays'):
    color = 'success'
else:
    color = 'danger'

In [86]:
[dbc.Button(f"{color}", color=f"{color}", size="sm") for color in theme_colors]

'success'

In [None]:
[dbc.Button(f"{color}", color=f"{color}", size="sm") for color in theme_colors]

In [60]:
checkSenti('im feeling bearish')

'Sentiment is predicted to be Bearish, 0.9762407648711596'

In [32]:
process_text('craaaaaaap im feeling bullish')

'craaaaaaap im feeling bullish'

In [38]:
checkSenti('im not bullish')

tensor([[ 1.9979, -1.7745]])


'Bearish'

In [37]:
checkSenti('im not bullish')
checkSenti('im feeling bullish')

tensor([[ 1.9979, -1.7745]])
tensor([[-1.9449,  1.8161]])


'Bullish'

In [81]:
samples = ['im feeling bullish about this stock',
           'im feeling bearish about this stock',
           'welp diamond hands it is',
           'shoot to the moon',
           'short this stock']
for sample in samples:
    print(checkSenti(sample))

Sentiment is predicted to be 99.79% Bullish
Sentiment is predicted to be 98.72% Bearish
Sentiment is predicted to be 75.84% Bullish
Sentiment is predicted to be 95.46% Bullish
Sentiment is predicted to be 99.43% Bearish
