In [67]:
import pandas as pd
import numpy as np
import networkx as nx
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import nltk
import re
import matplotlib.pyplot as plt
import dgl
import itertools
import pickle

from allennlp.modules.elmo import Elmo, batch_to_ids
from eunjeon import Mecab
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

from dgl.dataloading import GraphDataLoader
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import accuracy_score, recall_score, f1_score

plt.rc('font', family='Malgun Gothic')

In [68]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [69]:
data_dir = '../analysis_files/files/'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [70]:
file_list = os.listdir(os.path.join(data_dir+"IEMOCAP"))
file_list

['Session1', 'Session2', 'Session3', 'Session4', 'Session5']

In [71]:
file_name = []
for name in file_list :
    file_name.extend(os.listdir(os.path.join(data_dir+"IEMOCAP/"+name+"/transcriptions")))

In [72]:
file_name2 = []
for i in range(len(file_name)):
    file_name2.append(file_name[i].replace('.txt', ''))


In [73]:
with open(os.path.join(data_dir, 'IEMOCAP_features.pkl'), 'rb') as f:
    data = pickle.load(f)

In [74]:
videoIDs = []
videoSpeakers = []
videoLabels = []
videoText = []
videoAudio = []
videoVisual = []
videoSentence = []

for i, d in enumerate(data):
    if i <= 6 : 
        for j in range(len(file_name2)) :
            value = d[file_name2[j]] #file_name 배열에서 .txt를 제외한 변수명을 배열 순서대로 입력해줘
            if i == 0:
                videoIDs.extend(value)
            elif i == 1:
                videoSpeakers.extend(value)
            elif i == 2:
                videoLabels.extend(value)
            elif i == 3:
                videoText.extend(value)
            elif i == 4:
                videoAudio.extend(value)
            elif i == 5:
                videoVisual.extend(value)
            elif i == 6:
                videoSentence.extend(value)
        
# 데이터를 DataFrame으로 변환
df = pd.DataFrame({
    'VideoID': videoIDs,
    'Speakers': videoSpeakers,
    'Labels': videoLabels,
    # 'Text': videoText,
    # 'Audio': videoAudio,
    # 'Visual': videoVisual,
    'Sentence': videoSentence
})


In [75]:
# {'hap':0, 'sad':1, 'neu':2, 'ang':3, 'exc':4, 'fru':5}
df = df[['Speakers', 'Labels', 'Sentence']]

In [76]:
df

Unnamed: 0,Speakers,Labels,Sentence
0,F,2,Excuse me.
1,M,5,Do you have your forms?
2,F,2,Yeah.
3,M,5,Let me see them.
4,F,2,Is there a problem?
...,...,...,...
7428,M,3,oh! Marry you again? I wouldn't marry you agai...
7429,F,3,Beast
7430,M,3,You're a wicked little vampire. And I pray to...
7431,F,3,Brute


In [77]:
# Speakers 열을 기준으로 데이터 처리
new_speakers = []
new_labels = []
new_sentences = []

current_speaker = df['Speakers'][0]
current_sentence = df['Sentence'][0]
current_labels = df['Labels'][0]

for i in range(1, len(df)):
    if df['Speakers'][i] != current_speaker:
        new_speakers.append(current_speaker)
        new_labels.append(current_labels)
        new_sentences.append(current_sentence)
        
        current_speaker = df['Speakers'][i]
        current_sentence = df['Sentence'][i]
        current_labels = df['Labels'][i]
    else:
        current_sentence += ' ' + df['Sentence'][i]

# 마지막 데이터 처리
new_speakers.append(current_speaker)
new_labels.append(current_labels)
new_sentences.append(current_sentence)

# 새로운 데이터로 데이터프레임 업데이트
data = {
    'Speakers': new_speakers,
    'Labels': new_labels,
    'Sentence': new_sentences
}

new_df = pd.DataFrame(data)

In [78]:
# Speakers 값에 따라 Sentence를 M_Sentence와 F_Sentence로 분할
new_df['F_Sentence'] = new_df.loc[new_df['Speakers'] == 'F', 'Sentence']
new_df['M_Sentence'] = new_df.loc[new_df['Speakers'] == 'M', 'Sentence']

# 필요없는 열 삭제
new_df = new_df.drop(columns=['Sentence'])
new_df = new_df.drop(columns=['Speakers'])

In [79]:
# 새로운 행 데이터 생성
new_row = {
    'Labels': 3,
    'F_Sentence': np.nan,
    'M_Sentence': 'Yoo too'
}

# 새로운 행 데이터를 리스트에 추가하고, 이를 데이터프레임으로 변환하여 기존 데이터프레임과 병합
new_df = pd.concat([new_df, pd.DataFrame([new_row])], ignore_index=True)


In [80]:
new_df

Unnamed: 0,Labels,F_Sentence,M_Sentence
0,2,Excuse me.,
1,5,,Do you have your forms?
2,2,Yeah.,
3,5,,Let me see them.
4,2,Is there a problem?,
...,...,...,...
5083,3,,oh! Marry you again? I wouldn't marry you agai...
5084,3,Beast,
5085,3,,You're a wicked little vampire. And I pray to...
5086,3,Brute pig,


In [81]:
# 결과를 담을 리스트 생성
result_data = []

# 주어진 데이터프레임을 순회하며 원하는 형태로 변환하여 결과 리스트에 추가
for i in range(0, len(new_df), 2):
    labels = new_df.at[i + 1, 'Labels']
    f_sentence = new_df.at[i, 'F_Sentence']
    m_sentence = new_df.at[i + 1, 'M_Sentence']
    result_data.append({'F_Sentence': f_sentence, 'M_Sentence': m_sentence, 'Labels': labels})

# 결과 리스트로부터 데이터프레임 생성
result_df = pd.DataFrame(result_data)


In [82]:
result_df

Unnamed: 0,F_Sentence,M_Sentence,Labels
0,Excuse me.,Do you have your forms?,5
1,Yeah.,Let me see them.,5
2,Is there a problem?,Who told you to get in this line? Okay. But I ...,5
3,Well what's the problem? Let me change it.,This form is a Z.X.four. You can't-- This is ...,5
4,What? I'm getting an ID. This is why I'm her...,No. I need another set of ID to prove this is ...,5
...,...,...,...
2539,This is the end! Do you understand me? This is...,"Oh, you're not going like this.",3
2540,"Yes, I am. Let go with me","[GARBAGE] No, you're not.",3
2541,You are a cruel fiend and I-I loath you. I th...,oh! Marry you again? I wouldn't marry you agai...,3
2542,Beast,You're a wicked little vampire. And I pray to...,3


In [83]:
# 빈 데이터프레임 생성
processed_df = pd.DataFrame(columns=['사람문장1', '시스템문장1', '사람문장2', '시스템문장2', '사람문장3', '시스템문장3', '감정_대분류'])

# 3개씩 묶어서 처리
for i in range(0, len(result_df), 3):
    f_sentence1 = result_df.at[i, 'F_Sentence']
    m_sentence1 = result_df.at[i, 'M_Sentence']
    
    f_sentence2 = result_df.at[i + 1, 'F_Sentence'] if i + 1 < len(result_df) else np.nan
    m_sentence2 = result_df.at[i + 1, 'M_Sentence'] if i + 1 < len(result_df) else np.nan
    
    f_sentence3 = result_df.at[i + 2, 'F_Sentence'] if i + 2 < len(result_df) else np.nan
    m_sentence3 = result_df.at[i + 2, 'M_Sentence'] if i + 2 < len(result_df) else np.nan
    
    labels = result_df.at[i + 2, 'Labels']
    
    # 새로운 행 데이터를 딕셔너리에 추가
    data = {
        '사람문장1': f_sentence1,
        '시스템문장1': m_sentence1,
        '사람문장2': f_sentence2,
        '시스템문장2': m_sentence2,
        '사람문장3': f_sentence3,
        '시스템문장3': m_sentence3,
        '감정_대분류': labels
    }
    
    # 딕셔너리를 데이터프레임으로 변환하여 병합
    processed_df = pd.concat([processed_df, pd.DataFrame([data])], ignore_index=True)


In [84]:
processed_df

Unnamed: 0,사람문장1,시스템문장1,사람문장2,시스템문장2,사람문장3,시스템문장3,감정_대분류
0,Excuse me.,Do you have your forms?,Yeah.,Let me see them.,Is there a problem?,Who told you to get in this line? Okay. But I ...,5
1,Well what's the problem? Let me change it.,This form is a Z.X.four. You can't-- This is ...,What? I'm getting an ID. This is why I'm her...,No. I need another set of ID to prove this is ...,How am I supposed to get an ID without an ID? ...,I don't know. But I need an ID to pass this f...,5
2,I'm here to get an ID.,"No. I need another ID, a separate one.",Like what? Like a birth certificate?,"A birth certificate, a passport...a student ID...","Yes but my wallet was stolen, I don't have any...","Yeah. We keep it on file, but we need an ID t...",5
3,That's out of control.,I don't understand why this is so complicated ...,How long have you been working here? Clearly. ...,Yeah. Do you want to see my supervisor? Huh?...,That would - I would appreciate that. Yeah. D...,I don't understand. You've already done so mu...,1
4,I don't know. I put in that. request too. The...,I just don't see how they can make you leave w...,"I know. There's babies over there, though, tha...",I don't know how you can be okay with this? I...,"Just, you know, kicking myself. things just ar...",I know. I just don't see why it has to be you.,5
...,...,...,...,...,...,...,...
843,"Yes, I did quite a lot.",You must have let him kiss you quite a good de...,You're quite insufferable. I do imagine it's ...,I am not the slightest bit drunk.,You always had a weak head.,I do believe that I have said already that I h...,3
844,Not- Would you shut up.,"You know, we could get a really good debate go...","You're not very funny, dear. You better have ...",You know what? That's a very good idea. I th...,Very well. If you have to be boorish and idio...,Turn it off. It's driving me mad.,3
845,You are far too temperamental. Try to control...,Turn it off!,Go away! Go away! I- I hate you.,Very amusing indeed.,"You know what, I'm sick and tire of listening ...",Ja! Very funny.,3
846,"Stop. Would you just go away? I mean, you're...","You are a vile tempered, wicked living, evil l...",This is the end! Do you understand me? This is...,"Oh, you're not going like this.","Yes, I am. Let go with me","[GARBAGE] No, you're not.",3


In [85]:
# Define the mapping dictionary
emotion_mapping = {
    0: 'hap',
    1: 'sad',
    2: 'neu',
    3: 'ang',
    4: 'exc',
    5: 'fru'
}

# Apply the mapping to the '감정_대분류' column
processed_df['감정_대분류'] = processed_df['감정_대분류'].map(emotion_mapping)


In [86]:
processed_df

Unnamed: 0,사람문장1,시스템문장1,사람문장2,시스템문장2,사람문장3,시스템문장3,감정_대분류
0,Excuse me.,Do you have your forms?,Yeah.,Let me see them.,Is there a problem?,Who told you to get in this line? Okay. But I ...,fru
1,Well what's the problem? Let me change it.,This form is a Z.X.four. You can't-- This is ...,What? I'm getting an ID. This is why I'm her...,No. I need another set of ID to prove this is ...,How am I supposed to get an ID without an ID? ...,I don't know. But I need an ID to pass this f...,fru
2,I'm here to get an ID.,"No. I need another ID, a separate one.",Like what? Like a birth certificate?,"A birth certificate, a passport...a student ID...","Yes but my wallet was stolen, I don't have any...","Yeah. We keep it on file, but we need an ID t...",fru
3,That's out of control.,I don't understand why this is so complicated ...,How long have you been working here? Clearly. ...,Yeah. Do you want to see my supervisor? Huh?...,That would - I would appreciate that. Yeah. D...,I don't understand. You've already done so mu...,sad
4,I don't know. I put in that. request too. The...,I just don't see how they can make you leave w...,"I know. There's babies over there, though, tha...",I don't know how you can be okay with this? I...,"Just, you know, kicking myself. things just ar...",I know. I just don't see why it has to be you.,fru
...,...,...,...,...,...,...,...
843,"Yes, I did quite a lot.",You must have let him kiss you quite a good de...,You're quite insufferable. I do imagine it's ...,I am not the slightest bit drunk.,You always had a weak head.,I do believe that I have said already that I h...,ang
844,Not- Would you shut up.,"You know, we could get a really good debate go...","You're not very funny, dear. You better have ...",You know what? That's a very good idea. I th...,Very well. If you have to be boorish and idio...,Turn it off. It's driving me mad.,ang
845,You are far too temperamental. Try to control...,Turn it off!,Go away! Go away! I- I hate you.,Very amusing indeed.,"You know what, I'm sick and tire of listening ...",Ja! Very funny.,ang
846,"Stop. Would you just go away? I mean, you're...","You are a vile tempered, wicked living, evil l...",This is the end! Do you understand me? This is...,"Oh, you're not going like this.","Yes, I am. Let go with me","[GARBAGE] No, you're not.",ang


In [87]:
# Save the final DataFrame to a CSV file
processed_df.to_csv(os.path.join(data_dir, 'IEMOCAP_final_dataframe.csv'), encoding='cp949',index=False)

In [None]:
# 감성대화말뭉치(최종데이터)_Training.csv 파일을 pandas로 읽어옵니다.
df = pd.read_csv(os.path.join(data_dir, 'IEMOCAP_final_dataframe.csv'), encoding='cp949')
df = df[['사람문장1', '시스템문장1', '사람문장2','시스템문장2','사람문장3','시스템문장3','감정_대분류', '상황키워드']]