In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
from tqdm import tqdm
import pandas as pd

model_name = "searle-j/kote_for_easygoing_people"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

pipe = TextClassificationPipeline(
        model=model,
        tokenizer=tokenizer,
        device=0, # gpu number, -1 if cpu used
        return_all_scores=True,
        function_to_apply='sigmoid'
    )


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
original_df = pd.read_csv('../final_total_data.csv', encoding="utf-8-sig")

In [3]:
customer_review_list = original_df['고객리뷰'].tolist()
manager_review_list = original_df['사장답글'].tolist()

In [5]:
customer_emotion_list = []

for outputs in tqdm(pipe(customer_review_list[:10000])):
    temp = set()

    for output in outputs:
        if output["score"]>0.4:
            temp.add(output["label"])

    customer_emotion_list.append(temp)

customer_emotion_list

100%|██████████| 10000/10000 [00:00<00:00, 126789.30it/s]


[{'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'깨달음', '없음'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'감동/감탄',
  '고마움',
  '기대감',
  '기쁨',
  '뿌듯함',
  '안심/신뢰',
  '즐거움/신남',
  '편안/쾌적',
  '행복',
  '환영/호의'},
 {'감동/감탄', '고마움', '기쁨', '아껴주는', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'당황/난처', '불평/불만', '안타까움/실망', '어이없음', '짜증'},
 {'감동/감탄',
  '고마움',
  '기대감',
  '기쁨',
  '뿌듯함',
  '안심/신뢰',
  '즐거움/신남',
  '편안/쾌적',
  '행복',
  '환영/호의'},
 {'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'당황/난처', '불평/불만', '슬픔', '안타까움/실망', '짜증'},
 {'감동/감탄', '고마움', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'감동/감탄', '기쁨', '당황/난처', '불평/불만', '슬픔', '안타까움/실망'},
 {'감동/감탄',
  '고마움',
  '기대감',
  '기쁨',
  '뿌듯함',
  '아껴

In [6]:
len(customer_emotion_list)

10000

In [7]:
manager_emotion_list = []

for outputs in tqdm(pipe(manager_review_list[:10000])):
    temp = set()

    for output in outputs:
        if output["score"]>0.4:
            temp.add(output["label"])

    manager_emotion_list.append(temp)

manager_emotion_list

100%|██████████| 10000/10000 [00:00<00:00, 128245.00it/s]


[{'감동/감탄',
  '고마움',
  '기대감',
  '기쁨',
  '뿌듯함',
  '아껴주는',
  '안심/신뢰',
  '즐거움/신남',
  '편안/쾌적',
  '행복',
  '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '안심/신뢰', '즐거움/신남', '편안/쾌적', '행복', '환영/호의'},
 {'고마움', '기대감', '기쁨', '안심/신뢰', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'고마움', '기대감', '기쁨', '당황/난처', '불안/걱정', '슬픔', '안심/신뢰', '안타까움/실망'},
 {'감동/감탄',
  '고마움',
  '기대감',
  '기쁨',
  '뿌듯함',
  '안심/신뢰',
  '즐거움/신남',
  '편안/쾌적',
  '행복',
  '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'고마움', '기대감', '당황/난처', '불안/걱정', '불평/불만', '슬픔', '안심/신뢰', '안타까움/실망'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '뿌듯함', '안심/신뢰', '즐거움/신남', '행복', '환영/호의'},
 {'감동/감탄', '고마움', '기대감', '기쁨', '안심/신뢰', 

In [8]:
len(manager_emotion_list)

10000

In [9]:
emotion_intersection_list = []

for i, j in tqdm(zip(customer_emotion_list, manager_emotion_list)):
    emotion_intersection_list.append(set(i & j))


10000it [00:00, 494657.99it/s]


In [21]:
counter = 0

for i in range(10000):
    if len(emotion_intersection_list[i]) < 6:
        counter += 1

In [22]:
counter

2512