In [2]:
import pandas as pd
import os
import glob
from konlpy.tag import Okt, Kkma, Komoran
from collections import Counter

In [3]:
def createDirectory(directory): 
    """ 새로운 폴더를 생성하는 함수 """
    try: 
        if not os.path.exists(directory): 
            os.makedirs(directory) 
    except OSError: 
        print("Error: Failed to create the directory.")

In [4]:
def analysis_noun(df, stopwords):
    """ 명사를 추출하여 각 명사별 개수를 추출해주는 함수. """
    okt = Okt()
    # kkm = Kkma()
    # kom = Komoran()
    li_nouns = []

    for i in range(len(df['발언내용'])):
        line = df.iloc[i, 9]
    
        nouns = okt.nouns(line)    
        #nouns = kkm.nouns(line) 
        # nouns = kom.nouns(line)
        nouns = [j for j in nouns if len(j) >= 2 and j not in stopwords]
        li_nouns += nouns
        
    count_nouns = Counter(li_nouns)
    
    return count_nouns

In [5]:
def make_df(c):
    """ 정해진 포맷에 맞게 데이터프레임을 생성해주는 함수. """
    d = pd.DataFrame.from_dict(c, orient='index').reset_index()
    d.columns = ['단어', '빈도수']
    
    count_column = df['회수']
    d = pd.concat([d, count_column], axis = 1).dropna()
    d = d[['회수', '단어', '빈도수']]
    
    return d  

In [6]:
location = os.getcwd()

fname_list = os.listdir(os.path.join(location, "회의록_모음"))
file_list = glob.glob(os.path.join(location, "회의록_모음/*"))

# 원하는 폴더 이름 설정.
dir_name = "단어 빈도 분석"
createDirectory(dir_name)

s = pd.read_csv("불용어사전_통합본.csv", encoding = 'euc-kr')
stopwords_1_2 = set(s['불용어'])
for i in range(len(fname_list)):
    df = pd.read_csv(file_list[i], encoding = 'euc-kr')
    result = analysis_noun(df, stopwords_1_2)
    result_df = make_df(result)

    name = fname_list[i][:-4]  
    result_df.to_csv(os.path.join(os.path.join(location, dir_name), name + "_" + dir_name + ".csv"), index = False, encoding = 'euc-kr')
    print("%s 단어분석 완료" % name)

서울특별시의회 회의록 제277회[정례회] (2017.11.01 ~ 2017.12.20) 단어분석 완료
서울특별시의회 회의록 제284회[정례회] (2018.11.01 ~ 2018.12.20) 단어분석 완료
서울특별시의회 회의록 제290회[정례회] (2019.11.01 ~ 2019.12.20) 단어분석 완료
서울특별시의회 회의록 제298회[정례회](2020.11.02 ~ 2020.12.22) 단어분석 완료
서울특별시의회 회의록 제303회[정례회](2021.11.01 ~ 2021.12.27) 단어분석 완료
