## Análise de Sentimentos

In [None]:
import pandas as pd

In [None]:
noticias = pd.read_csv("https://raw.githubusercontent.com/adriel1ft/stock-prediction-cnn/main/articles_netflix_2015-2022.csv", index_col = 0)

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

finbert = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", num_labels=3)
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")

sentiment_pipeline = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)

data = noticias['title'].tolist()

resultados = sentiment_pipeline(data)

noticias['sentimento'] = [resultado['label'] for resultado in resultados]
noticias['score'] = [resultado['score'] for resultado in resultados]

print(noticias)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


                                                        title sentimento  \
year_month                                                                 
201501      Sundance 2015: Duplass Brothers, Netflix Make ...    Neutral   
201501      Netflix Accelerates Ambitious Global Expansion...   Positive   
201501       Movie Studio by Amazon for Screens Big and Small    Neutral   
201501      As Content Pours From Everywhere, New Network ...    Neutral   
201501                    Cleaning Up the Mac’s Printers List    Neutral   
...                                                       ...        ...   
202112      The Best Movies and TV Shows New to Netflix, A...   Positive   
202112                            5 Things to Do This Weekend    Neutral   
202112       ‘Emily in Paris’ and the City I Thought Was Mine    Neutral   
202112      ‘Cowboy Bebop’ Beats and the Universe of ‘Dune...    Neutral   
202112          In 2021, Black Boyhood Got More Complex on TV    Neutral   

           

In [None]:
noticias.head()

Unnamed: 0_level_0,title,sentimento,score
year_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
201501,"Sundance 2015: Duplass Brothers, Netflix Make ...",Neutral,0.99986
201501,Netflix Accelerates Ambitious Global Expansion...,Positive,0.995587
201501,Movie Studio by Amazon for Screens Big and Small,Neutral,0.999978
201501,"As Content Pours From Everywhere, New Network ...",Neutral,0.894186
201501,Cleaning Up the Mac’s Printers List,Neutral,0.999932


In [None]:
noticias['year_month'] = pd.to_datetime(noticias['year_month'])

noticias_monthly_avg = noticias.groupby(noticias['year_month'].dt.to_period('M')).agg({
    'score': 'mean',
    'sentimento': lambda x: x.mode()[0]  # Sentimento mais frequente
}).reset_index()

noticias_monthly_avg.rename(columns={'year_month': 'month'}, inplace=True)

print(noticias_monthly_avg)

      month     score sentimento
0   2015-01  0.978255    Neutral
1   2015-02  0.993725    Neutral
2   2015-03  0.947350    Neutral
3   2015-04  0.999888    Neutral
4   2015-05  0.999075    Neutral
..      ...       ...        ...
79  2021-08  0.967460    Neutral
80  2021-09  0.999810    Neutral
81  2021-10  0.977608    Neutral
82  2021-11  0.993130    Neutral
83  2021-12  0.999786    Neutral

[84 rows x 3 columns]


In [None]:
noticias_monthly_avg['sentimento'].unique()

array(['Neutral'], dtype=object)

In [None]:
csv_file = 'avg_monthly_sentiment_scores_netflix.csv'

noticias_monthly_avg.to_csv(csv_file, index=False)

print(f"Dados armazenados em {csv_file} com sucesso.")


Dados armazenados em avg_monthly_sentiment_scores_netflix.csv com sucesso.
