In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('correct_for_learning.csv')
df = df[['post_text', 'post_date', 'user_id', 'emo', 'soc',
       'psych', 'health', 'emo_class', 'soc_class', 'psych_class',
       'health_class']]
df.columns = ['text', 'post_date', 'user_id', 'emo', 'soc', 'psych', 'health',
       'emo_class', 'soc_class', 'psych_class', 'health_class']

In [None]:
from transformers import pipeline


In [None]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

model_name = "DeepPavlov/rubert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_analysis = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    device='cuda:0'
)

'from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer\n\nmodel_name = "DeepPavlov/rubert-base-cased"\nmodel = AutoModelForSequenceClassification.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nsentiment_analysis = pipeline(\n    "sentiment-analysis",\n    model=model,\n    tokenizer=tokenizer,\n    device=\'cuda:0\'  \n)'

In [None]:
import torch
from transformers import AutoModelForSequenceClassification
from transformers import BertTokenizerFast

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
model = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)
model.to(device)

@torch.no_grad()
def predict(text):
    inputs = tokenizer(text, max_length=512, padding=True, truncation=True, return_tensors='pt')
    inputs.to(device)
    outputs = model(**inputs)
    predicted = torch.nn.functional.softmax(outputs.logits, dim=1)
    predicted = torch.argmax(predicted, dim=1).cpu().numpy()
    return predicted


In [None]:
texts = ['какой чудесный день! я счастлив', 'я себя ненавижу это место отвратительно и меня бесит', 'в окне пролетела птица']

predict(texts)

array([1, 2, 0])

In [None]:
results = []
for text in list(df.text.tolist()):
  pred = predict(text)[0]
  results.append(pred)

In [None]:
df['sentiment'] = results

In [None]:
df

Unnamed: 0,text,post_date,user_id,emo,soc,psych,health,emo_class,soc_class,psych_class,health_class,sentiment
0,Я смеялся до слёз. Пародия про каникулы презид...,2020,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,1
1,"Торт ""Наполеон"" на сковороде Готовится, быстро...",2020,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,1
2,"Сольный концерт Софии Ротару в Кремле ( т/к ""И...",2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0
3,TINA TURNER & STING - ON SILENT WINGS (2),2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0
4,Tina Turner - Something Beautiful Remains (Pro...,2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
152666,Все,2010,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0
152667,нет пока ещё:),2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0
152668,Да у меня баланс умер:(А пополнить пока некому:),2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,2
152669,Спаси Господи.В инет очень давно уже не выходи...,2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0


In [None]:
def name_match(x):
  if x == 0:
    return 'neutral'
  if x == 1:
    return 'positive'
  if x == 2:
    return 'negative'

In [None]:
df['sent_name'] = df.sentiment.apply(name_match)
df

Unnamed: 0,text,post_date,user_id,emo,soc,psych,health,emo_class,soc_class,psych_class,health_class,sentiment,sent_name
0,Я смеялся до слёз. Пародия про каникулы презид...,2020,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,1,positive
1,"Торт ""Наполеон"" на сковороде Готовится, быстро...",2020,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,1,positive
2,"Сольный концерт Софии Ротару в Кремле ( т/к ""И...",2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0,neutral
3,TINA TURNER & STING - ON SILENT WINGS (2),2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0,neutral
4,Tina Turner - Something Beautiful Remains (Pro...,2013,856,3.666667,0.8,2.833333,2.285714,1,0,1,0,0,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...
152666,Все,2010,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0,neutral
152667,нет пока ещё:),2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0,neutral
152668,Да у меня баланс умер:(А пополнить пока некому:),2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,2,negative
152669,Спаси Господи.В инет очень давно уже не выходи...,2008,7329,1.000000,2.8,2.166667,2.142857,0,1,0,0,0,neutral


In [None]:
df.to_csv('learning_features.csv', index = False)

In [None]:
!pip install --upgrade spacy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spacy
  Downloading spacy-3.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m67.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: spacy
  Attempting uninstall: spacy
    Found existing installation: spacy 3.5.2
    Uninstalling spacy-3.5.2:
      Successfully uninstalled spacy-3.5.2
Successfully installed spacy-3.5.3


In [None]:
!pip install razdel

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting razdel
  Downloading razdel-0.5.0-py3-none-any.whl (21 kB)
Installing collected packages: razdel
Successfully installed razdel-0.5.0


In [None]:
!pip install pymorphy2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pymorphy2
  Downloading pymorphy2-0.9.1-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.5/55.5 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dawg-python>=0.7.1 (from pymorphy2)
  Downloading DAWG_Python-0.7.2-py2.py3-none-any.whl (11 kB)
Collecting pymorphy2-dicts-ru<3.0,>=2.4 (from pymorphy2)
  Downloading pymorphy2_dicts_ru-2.4.417127.4579844-py2.py3-none-any.whl (8.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt>=0.6 (from pymorphy2)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: docopt
  Building wheel for docopt (setup.py) ... [?25l[?25hdone
  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13707 sha

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install nltk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
# pip install -U spacy
import spacy
from spacy import displacy
from IPython.core.display import display, HTML
# !python -m spacy download ru_core_news_lg

In [None]:
from razdel import sentenize