In [4]:
import json
import pathlib
import typing as tp
from natasha import (
    Segmenter,
    NewsNERTagger,
    NewsEmbedding,
    Doc
)
from pymorphy2 import MorphAnalyzer
import pickle
import math
from collections import Counter

In [1]:
with open('sentiment_texts.pickle', 'rb') as f1:
    sentiment_texts = pickle.load(f1)

In [None]:
def normalize_company_name(name):
    morph = MorphAnalyzer()
    normalized_words = []
    for word in name.split():
        parsed_word = morph.parse(word)[0]
        normalized_word = parsed_word.inflect({'nomn', 'sing'})
        if normalized_word:
            normalized_words.append(normalized_word.word)
        else:
            normalized_words.append(word)
    return ' '.join(normalized_words)

In [None]:
def extract_company_names(text):
    segmenter = Segmenter()
    emb = NewsEmbedding()
    ner_tagger = NewsNERTagger(emb)

    doc = Doc(text)
    doc.segment(segmenter)
    doc.tag_ner(ner_tagger)

    companies = [span.text for span in doc.spans if span.type == 'ORG']
    return [normalize_company_name(company) for company in companies]


In [None]:
def score_texts(texts):
    results = []
    i = 0
    for text in texts:
        print(i)
        i+=1
        company_names = extract_company_names(text)
        company_names_str = ', '.join(company_names) if company_names else ""
        results.append(f"[{company_names_str}] {text}")
    return results

In [None]:
scores = score_texts(sentiment_texts['MessageText'])
print(scores)

In [None]:
from deeppavlov import build_model, configs

def sentiment_model():
    model = build_model(configs.classifiers.rusentiment_convers_bert, download=True)
    return model

model = sentiment_model()

In [None]:
def analyze_sentiment(text):
    return model([text])[0]


In [None]:
def score_texts(texts):
    results = []
    for text in texts:
        company_names = extract_company_names(text)
        sentiment = analyze_sentiment(text)
        company_names_str = ', '.join(company_names) if company_names else "No company names"
        results.append(f"Sentiment: {sentiment}, Companies: {company_names_str}, Text: {text}")
    return results

In [None]:
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

tokenizer = RegexTokenizer()
FastTextSocialNetworkModel.MODEL_PATH = 'nltk_data/fasttext-social-network-model.bin'
model = FastTextSocialNetworkModel(tokenizer=tokenizer)
sentiment_list = []
results = model.predict(sentiment_texts['MessageText'], k=2)
for sentiment in results:
    sentiment_list.append(sentiment)

In [None]:
print(sentiment_list[3])

In [None]:
keys = set()
for i in sentiment_list:
  for key, value in i.items():
    keys.add(key)
print(keys)

In [None]:
def sentiment_score(sentiment_data):
    weights = {'negative': 1, 'neutral': 3.5, 'positive': 5, 'skip':0, 'speech':0}
    scores = []
    count = 0
    for data in sentiment_data:
        score = sum(data.get(key, 0) * weight for key, weight in weights.items())
        scores.append(min(5, math.ceil(score)))
        count += 1

    return scores

In [None]:
print(sentiment_score(sentiment_list)[3])

In [None]:
print(Counter(sentiment_score(sentiment_list)))