In [18]:
import pandas as pd 
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from tqdm.auto import tqdm

In [12]:
items = pd.read_csv('items_cleaned.csv')
items = items.head(20)
items.head()

Unnamed: 0.1,Unnamed: 0,type,id,graph,label,source_id,source_label,source_url,source_date,source_language,relevantExcerpt,excerpt_id,excerpt_value,words,year
0,0,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,& # 34 ; Owd Siah waved his as he termed a thi...,"['sniffing', 'the party', 'betimes']",1880
1,1,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,179 With her arms linked in those of Lucy and ...,"['Bartlett', 'perfume', 'scent', 'very odorous...",1880
2,2,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,104 pictures : Eccho forth your grones through...,"['stench', 'noysome', 'his', 'crawling']",1880
3,3,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,"Nothing , perhaps , sends up the quicksilver o...",['fragrant'],1880
4,4,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,open space I sent forth a shout of joy that ec...,"['scent', 'thy', 'so much sharper']",1880


In [15]:
# Method 1: VADER Sentiment Analysis
def vader_sentiment(text):
    sid = SentimentIntensityAnalyzer()
    sentiment_scores = sid.polarity_scores(text)
    return sentiment_scores['compound']

items['SentAnalysis_VADER'] = items['excerpt_value'].apply(vader_sentiment)

# Method 2: TextBlob Sentiment Analysis
def textblob_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

items['SentAnalysis_TextBlob'] = items['excerpt_value'].apply(textblob_sentiment)

# Method 3: BERT Sentiment Analysis
sentiment_pipeline = pipeline("sentiment-analysis")

def bert_sentiment(text):
    result = sentiment_pipeline(text)
    return result[0]['score'] if result[0]['label'] == 'POSITIVE' else -result[0]['score']

items['SentAnalysis_BERT'] = items['excerpt_value'].apply(bert_sentiment)

items.head()

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Unnamed: 0.1,Unnamed: 0,type,id,graph,label,source_id,source_label,source_url,source_date,source_language,relevantExcerpt,excerpt_id,excerpt_value,words,year,SentAnalysis_VADER,SentAnalysis_TextBlob,SentAnalysis_BERT
0,0,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,& # 34 ; Owd Siah waved his as he termed a thi...,"['sniffing', 'the party', 'betimes']",1880,0.6414,0.088636,-0.988164
1,1,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,179 With her arms linked in those of Lucy and ...,"['Bartlett', 'perfume', 'scent', 'very odorous...",1880,0.7338,-0.071652,-0.999057
2,2,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,104 pictures : Eccho forth your grones through...,"['stench', 'noysome', 'his', 'crawling']",1880,-0.9842,-0.0625,-0.984664
3,3,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,"Nothing , perhaps , sends up the quicksilver o...",['fragrant'],1880,0.0516,0.083333,-0.937853
4,4,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,open space I sent forth a shout of joy that ec...,"['scent', 'thy', 'so much sharper']",1880,0.9839,0.202273,0.999463


In [16]:
items.head(20)

Unnamed: 0.1,Unnamed: 0,type,id,graph,label,source_id,source_label,source_url,source_date,source_language,relevantExcerpt,excerpt_id,excerpt_value,words,year,SentAnalysis_VADER,SentAnalysis_TextBlob,SentAnalysis_BERT
0,0,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,& # 34 ; Owd Siah waved his as he termed a thi...,"['sniffing', 'the party', 'betimes']",1880,0.6414,0.088636,-0.988164
1,1,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,179 With her arms linked in those of Lucy and ...,"['Bartlett', 'perfume', 'scent', 'very odorous...",1880,0.7338,-0.071652,-0.999057
2,2,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,104 pictures : Eccho forth your grones through...,"['stench', 'noysome', 'his', 'crawling']",1880,-0.9842,-0.0625,-0.984664
3,3,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,"Nothing , perhaps , sends up the quicksilver o...",['fragrant'],1880,0.0516,0.083333,-0.937853
4,4,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,open space I sent forth a shout of joy that ec...,"['scent', 'thy', 'so much sharper']",1880,0.9839,0.202273,0.999463
5,5,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,"Even when he was quite an old , old man , it w...","['fragrant', 'lanes', 'hedgerows']",1880,-0.5203,-0.012135,-0.822548
6,6,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,235 with quite a collection of whips of all so...,"['odour', 'aroma', 'of tobacco', 'subtle', 'of...",1880,0.0772,0.091667,0.870115
7,7,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Gabriel Harvey ' s ' praise ' could have been ...,"['stinks', 'literary ghoul', 'in the nostrils ...",1880,0.7717,0.077778,-0.947101
8,8,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Not much that I remember ; only that lie hoped...,"['scented', 'limes']",1880,0.911,0.2,0.998094
9,9,http://data.odeuropa.eu/ontology/L11_Smell,http://data.odeuropa.eu/smell/a09e1c3c-f023-5e...,http://data.odeuropa.eu/british-library,smell,http://data.odeuropa.eu/source/56c5cda7-750c-5...,Geschichte der Stadt und Bäder zu Baden [in C...,,1880,en,http://data.odeuropa.eu/source/56c5cda7-750c-5...,http://data.odeuropa.eu/source/56c5cda7-750c-5...,THE BARONETS WELCOME HOME .51 upon generations...,"['the', 'scented', 'limes']",1880,0.9348,0.316667,0.995166


In [19]:
df = pd.read_csv('cleaned_data.csv')

df = df[df['relevantExcerpt'].notna()]

df = df.head(20)

tokenizer = AutoTokenizer.from_pretrained('j-hartmann/emotion-english-distilroberta-base')
model = AutoModelForSequenceClassification.from_pretrained('j-hartmann/emotion-english-distilroberta-base')

emotion_classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

def classify_emotions(text):
    return emotion_classifier(text)[0]['label']

df['gen_emotion'] = df['excerpt_value_cleaned'].apply(classify_emotions)

df.head(20)

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: eb0f8e6e-54a8-4b98-82f6-efcc985125a4)')' thrown while requesting HEAD https://huggingface.co/j-hartmann/emotion-english-distilroberta-base/resolve/main/tokenizer_config.json
'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: b553b5ed-afb6-4b4f-8d34-b4fbc9c68bab)')' thrown while requesting HEAD https://huggingface.co/j-hartmann/emotion-english-distilroberta-base/resolve/main/config.json


OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like j-hartmann/emotion-english-distilroberta-base is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.