In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, SequentialSampler
from torch.nn.functional import softmax
import torch
import pandas as pd

# KR-FinBERT 모델과 토크나이저 초기화
model_name = "kb-albert-char-base-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def sentiment_analysis(text, model, tokenizer):
    # Tokenize the input text
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")

    # Get the model's predictions
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = softmax(outputs.logits, dim=1)

    # Return the predicted positive sentiment probability
    return predictions[0][1].item()

# prepro_company_data.csv 파일 읽기
df = pd.read_csv("./prepro_company_data.csv")

if "Title" in df.columns:
    df["Positive_Score"] = 0
    for i in range(len(df)):
        positive_score = sentiment_analysis(df.loc[i, "Title"], model, tokenizer)
        df.loc[i, "Positive_Score"] = positive_score
    
    # 파일 이름 변경하여 저장
    new_file_path = "./KB-ALbert.csv"
    df.to_csv(new_file_path, index=False, encoding='utf-8-sig')
    print(f"Saved DataFrame with Positive_Score to {new_file_path}.")
    print(df.head(3))
else:
    print("prepro_company_data.csv does not contain 'Title' column.")

Some weights of the model checkpoint at kb-albert-char-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.dense.weight', 'sop_classifier.classifier.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.decoder.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at kb-albert-ch

Saved DataFrame with Positive_Score to ./KB-ALbert.csv.
  company     code        Date                               Title  \
0    삼성전자  '005930  2021-03-12  진짜 5 G 28 서비스 활성화 첫 발 과기부 TF 발족 회의   
1    삼성전자  '005930  2021-03-05                아버님 댁 에어 프라이어 놓다 드리다   
2    삼성전자  '005930  2021-05-14           ESG 경영 첫 단추 늘다 여성 관리자 키우다   

   Positive_Score  
0        0.551764  
1        0.539966  
2        0.541864  
