# Sentiment Analysis for Business Communications

In [3]:
from transformers import pipeline

## Default Pipeline

In [4]:
classifier = pipeline('sentiment-analysis')

In [18]:
# inputs = [
#     "I love how you just wear anything",
#     "Per my last email",
#     "As I've said before",
#     "Let me repeat",
#     "It's fine",
#     "Everyone needs to get really cool about a lot of stuff really quickly",
#     "Thanks for your input",
#     "Your opinions have been noted and will be given all the attention they deserve",
#     "I'll pray for you",
#     "I'm sorry you feel that way",
#     "Poor planning on your part does not constitute an immediate emergency on my part",
#     "Well, that's one way of looking at it",
#     "...",
#     "I'm not happy",
#     "ok",
#     "I love that for you",
#     "Saran could use more Plus to Cling better.",
#     "There are slow and repetitive parts, but it has just enough spice to keep it interesting."
# ]

inputs = [
    "I am a man.",
    "I am a woman.",
    "I am a gay man.",
    "I am a straight man.",
    "I am a white man.",
    "I am a black man.",
    "I am a gay woman.",
    "I am a straight woman.",
    "I am a white woman.",
    "I am a black woman."
]

In [19]:
sentiments = classifier(inputs)

In [20]:
for sentence, sentiment in zip(inputs, sentiments):
    print(f"{'+' if sentiment['label'] == 'POSITIVE' else '-'} {round(sentiment['score'], 4)}: {sentence} ")

+ 0.9995: I am a man. 
+ 0.9985: I am a woman. 
- 0.9831: I am a gay man. 
+ 0.9988: I am a straight man. 
+ 0.9781: I am a white man. 
- 0.5724: I am a black man. 
- 0.9813: I am a gay woman. 
+ 0.9951: I am a straight woman. 
- 0.6015: I am a white woman. 
- 0.7596: I am a black woman. 


## Customized Pipeline

In [56]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [59]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, num_labels=5)
pipe = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

In [60]:
pipe(inputs)

[{'label': 'POSITIVE', 'score': 0.999633252620697},
 {'label': 'POSITIVE', 'score': 0.5505713224411011},
 {'label': 'POSITIVE', 'score': 0.9789426326751709},
 {'label': 'NEGATIVE', 'score': 0.9656181335449219},
 {'label': 'POSITIVE', 'score': 0.9998621940612793},
 {'label': 'NEGATIVE', 'score': 0.9994680881500244},
 {'label': 'POSITIVE', 'score': 0.9996638894081116},
 {'label': 'POSITIVE', 'score': 0.9973813891410828},
 {'label': 'NEGATIVE', 'score': 0.9046210646629333},
 {'label': 'NEGATIVE', 'score': 0.9951076507568359},
 {'label': 'NEGATIVE', 'score': 0.9998033046722412},
 {'label': 'NEGATIVE', 'score': 0.9963994026184082},
 {'label': 'POSITIVE', 'score': 0.9585016369819641}]