In [1]:
from konlpy.tag import Mecab

from gensim.models import fasttext
from gensim.test.utils import datapath
from gensim.utils import tokenize
from gensim import utils
import numpy as np
import pandas as pd
import tempfile
import os
import re
import sys
import time
from PyQt5.QtWidgets import *
from PyQt5.Qt import Qt
from lxml import etree

# 미리 훈련된 모델 로드
model = fasttext.load_facebook_model("C:/Users/pranst/superbigmodel.bin", encoding="utf-8")

# 학습을 위한 Mecab 로드
mecab = Mecab("C:/mecab/mecab-ko-dic")

keywords = {'2016년 대학교 금연사업 운영평가':['담배', '금연', '흡연'],
           '광주지역 노동환경 실태조사 설문지_근로자':['노동', '근무', '임금', '직장'],
           '설문지_전남대학교 대학원생 인권실태조사':['인권', '대학원'],
           '설문지_2018문화예술지원사업 만족도 조사':['공연', '전시', '문화', '예술'],
           '장애인문화예술실태조사_통합':['장애', '장애인', '예술'],
           '설문지_광주광역시 주요정책에 대한 시민 인식도 조사':['정책'],
           '설문지_2020년 광주광역시 자원봉사 실태조사':['자원봉사', '봉사'],
           '설문지_2020 광주 평생교육 만족도 조사_일반':['평생교육',' 교육']
           }

In [2]:
class MyIter:
    def __init__(surveyname):
        self.surveyname = surveyname
    
    def __iter__(self):
        path = datapath(self.surveyname)
        print(path)
        with utils.open(path, 'r', encoding='utf-8') as fin:
            for line in fin:
                re.sub('[^A-Za-z0-9]', '',line)#특수문자 제거
                #re.sub('[^A-Za-z0-9가-힣]', '',line)
                yield list(mecab.morphs(line))
                #yield list(tokenize(line))

def modelTraining(surveyName):
    model.build_vocab(MyIter(surveyName), update=True)
    model.train(MyIter(urveyName), total_examples =len(model.wv.key_to_index), epochs=1)
    fasttext.save_facebook_model(model, "superbigmodel1.bin")

In [6]:
class processing:
    def extract(keywordlist, weight, interest=False, n=10, sample=5):
        df = pd.read_excel("C:/Users/pranst/20대이상회원정보.xlsx", sheet_name='시트0', usecols='F:L')

        new_data= []
        
        if interest:
            weight.append((4-sum(weight))/4)
        else:
            weight.append(0)

        #연관도 계산
        for i in range(len(df)):
            tmp = []

            for j in range(5):
                info = df.iloc[i][j]
                if j==1:
                    if info.rfind('공학과') != -1:
                        info = info[0:info.rfind('공학과')]
                    elif info.rfind('학과') != -1:
                        info = info[0:info.rfind('학과')]
                    elif info.rfind('과') != -1:
                        info = info[0:info.rfind('과')]
                for k in range(len(keywordlist)):
                    sim = model.wv.similarity(info, keywordlist[k])
                    value = sim/(1+n*abs(sim))
                    tmp = tmp + [value]

            new_data.append(tmp)

        sdf = pd.DataFrame(new_data)

        data = pd.DataFrame()

        cn = ['전공', '세부전공', '직업', '세부직업', '관심사']

        for i in range(5):
            c = sdf[i*len(keywordlist)]
            k = 1
            for j in range(len(keywordlist)-1):
                c += sdf[i*len(keywordlist)+k]
                k += 1

            data = pd.concat([data,c], axis=1)
        data.columns = cn
        
        data['라벨'] = None
        
        #라벨링
        for i in range(len(data)):
            if (data.iloc[i][0] + data.iloc[i][1]) > (data.iloc[i][2] + data.iloc[i][3]):
                data.loc[i,'라벨'] = '전공'
            else:
                data.loc[i,'라벨'] = '직업'

        data['sum']=0
        
        for i in range(len(data)):
            value = 0

            for j in range(4):
                value += (weight[j] * data.iloc[i][j])

            data.loc[i,'sum'] = value       

        sorting_data = data.sort_values(by=['sum'], axis=0, ascending=False).head(5)

        view = []
        #c = df.loc[data.sort_values(by=['sum'], axis=0, ascending=False).head(5).index[0]]
        
        for i in range(5):
            if sorting_data.iloc[i]['라벨'] == '전공':
                c = df.loc[sorting_data.index[i]].drop('직업라벨링').tolist()
            elif sorting_data.iloc[i]['라벨'] == '직업':
                c = df.loc[sorting_data.index[i]].drop('전공라벨링').tolist()

            view.append(c)

        return view


In [7]:
class InputKeywords(QMainWindow):
    def __init__(self, parent=None):
        super().__init__(parent)
        #self.keywordtext = ""
        
        #default_weight=[0.6, 0.7, 0.8, 0.9]
        #default_interest=False, n=10, sample=5
        self.state = False
        self.n = 0
        self.sample = 0
        self.filename = ""
        self.setupUi()
    def setupUi(self):
        # 창 설정
        self.setWindowTitle("전문가 패널 추천 시스템") # 타이틀
        #self.resize(553, 435)
        self.centralWidget = QWidget()
        self.setCentralWidget(self.centralWidget)
        layout = QGridLayout()
        self.centralWidget.setLayout(layout)      
        
        #self.keywordLabel = QLabel("키워드 입력", self)
        #self.keywordLabel.setAlignment(Qt.AlignHCenter | Qt.AlignVCenter)
        #self.keywordText = QLineEdit()
        
        ## 1 번 그룹
        groupBox1 = QGroupBox("파일")
        layout.addWidget(groupBox1)
        
        Box1 = QHBoxLayout()
        groupBox1.setLayout(Box1)
        
        self.filenameText = QLineEdit()
        Box1.addWidget(self.filenameText)
        self.getFileBtn = QPushButton("찾아보기", self)
        self.getFileBtn.clicked.connect(self.getFileClicks)
        Box1.addWidget(self.getFileBtn)
        
        ## 2 번 그룹
        groupBox2 = QGroupBox("키워드")
        layout.addWidget(groupBox2)
        
        Box2 = QHBoxLayout()
        groupBox2.setLayout(Box2)
        
        self.keywordLabel = QLabel("키워드", self)
        Box2.addWidget(self.keywordLabel)
        self.keywordText = QLineEdit()
        Box2.addWidget(self.keywordText)
        
        ## 3 번 그룹
        groupBox3 = QGroupBox("옵션")
        layout.addWidget(groupBox3)
        
        Box3 = QHBoxLayout()
        groupBox3.setLayout(Box3)

        self.majorLabel = QLabel("전공", self)
        Box3.addWidget(self.majorLabel)
                                             
        self.majorText = QLineEdit('0.6')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.majorText)
        
        self.dmajorLabel = QLabel("세부전공", self)
        Box3.addWidget(self.dmajorLabel)
                                             
        self.dmajorText = QLineEdit('0.7')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.dmajorText)
        
        self.jobLabel = QLabel("직업", self)
        Box3.addWidget(self.jobLabel)
                                             
        self.jobText = QLineEdit('0.8')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.jobText)
        
        self.djobLabel = QLabel("세부직업", self)
        Box3.addWidget(self.djobLabel)
                                             
        self.djobText = QLineEdit('0.9')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.djobText)
        
        self.interestCk = QCheckBox("관심사", self)        
        self.interestCk.stateChanged.connect(self.setState)
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.interestCk)

        self.nLabel = QLabel("n", self)
        Box3.addWidget(self.nLabel)
                                             
        self.nText = QLineEdit('10')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.nText)
                                             
        self.sampleLabel = QLabel("sample", self)
        Box3.addWidget(self.sampleLabel)
                                             
        self.sampleText = QLineEdit('5')        
        #self.resetBtn.setFixedSize(150, 20)
        Box3.addWidget(self.sampleText)
                                          
        ## 4 번 그룹
        groupBox4 = QGroupBox("버튼")
        layout.addWidget(groupBox4)
        
        Box4 = QHBoxLayout()
        groupBox4.setLayout(Box4)
            
        self.resetBtn = QPushButton("리셋", self)        
        self.resetBtn.clicked.connect(self.resetClicks)
        #self.resetBtn.setFixedSize(150, 20)
        Box4.addWidget(self.resetBtn)
        
        self.resBtn = QPushButton("추천", self)
        #self.resBtn.setFixedSize(150, 20)
        self.resBtn.clicked.connect(self.resClicks)
        Box4.addWidget(self.resBtn)
        
        self.trainBtn = QPushButton("학습", self)
        #self.trainBtn.setFixedSize(150, 20)
        self.trainBtn.clicked.connect(self.trainClicks)
        Box4.addWidget(self.trainBtn)
        
        ## 5번 그룹
        groupBox5 = QGroupBox("전문가 패널 추천 결과")
        layout.addWidget(groupBox5)
        
        Box5 = QHBoxLayout()
        groupBox5.setLayout(Box5)
        #self.resLabel = QLabel("<< 전문가 패널 추천 결과 >>")
        #self.resLabel.setAlignment(Qt.AlignHCenter | Qt.AlignVCenter)
        
        self.resTable = QTableWidget()
        self.resTable.setColumnCount(6)
        self.resTable.setRowCount(20)

        self.resTable.setHorizontalHeaderLabels(['전공', '세부전공', '직업', '세부직업', '관심사', '라벨'])
        
        Box5.addWidget(self.resTable)
        
        #layout = QGridLayout()
        #layout.addWidget(self.filenameText)
        #layout.addWidget(self.getFileBtn)
        #layout.
        #layout.addWidget(self.keywordText)
        #layout.addWidget(self.btngroup)        
        #layout.addWidget(self.resLabel)
        #layout.addWidget(self.resTable)
        
    def setState(self, state):
        if state == Qt.checked:
            self.state = True
        else:
            self.state = False  
        
    def getFileClicks(self):
        self.fname = QFileDialog.getOpenFileName(self, 'Open file', 'C:/Users/pranst/Desktop/캡스톤_설문/', 'File(*.txt)')
        if self.fname[0]:
            fname_list = self.fname[0].split('/')
            self.filename = fname_list[-1]
            self.filenameText.setText(self.filename)
            # 키워드 찾아서 키워드 lineedit에 삽입
            # self.keywordText.setText('<ex. #흡연 #금연>')
            # 
        
    def resetClicks(self):
        #파일이름리스트 초기화
        self.fname = []
        self.filename = ""
        
        # 텍스트 초기화
        self.filenameText.clear()
        self.keywordText.clear()
        
        # 표 초기화
        for i in range(20):
            for j in range(6):
                self.resTable.setItem(i, j, QTableWidgetItem(''))
        
        self.n = 0
        self.sample = 0
        
        self.majorText.setText('0.6')
        self.dmajorText.setText('0.7')
        self.jobText.setText('0.8')
        self.djobText.setText('0.9')
        
        self.nText.setText('10')
        self.sampleText.setText('5')
                
    def resClicks(self):
        self.wlist = []
        self.wlist.append(float(self.majorText.text()))
        self.wlist.append(float(self.dmajorText.text()))
        self.wlist.append(float(self.jobText.text()))
        self.wlist.append(float(self.djobText.text()))        
        
        self.keywordlist = keywords[self.filename.replace('.txt', '')]
        
        self.keywordText.setText('#'+' #'.join(self.keywordlist))
        
        #keywordlist = self.keywordText.text()
        #keywordlist = keywordlist.replace('#', '').split(' ')
        
        self.n = int(self.nText.text())
        self.sample = int(self.sampleText.text())
        
        self.list = processing.extract(self.keywordlist, self.wlist, self.state, self.n, self.sample)
        for i in range(len(self.list)):
            for j in range(len(self.list[0])):
                self.resTable.setItem(i, j, QTableWidgetItem(self.list[i][j]))
    
    def trainClicks(self):
        startwarn = QMessageBox.warning(self, '알림', '학습이 시작되었습니다.')
        self.timer.start()

        startwarn.close()
        if startmsg == QMessageBox.Ok:
            modelTraining(self.fname[0])
        endwarn = QMessageBox.warning(self, '알림', '학습이 완료되었습니다.')

In [8]:
def main():
    app = QApplication(sys.argv)
    win = InputKeywords()
    win.show()
    sys.exit(app.exec_())

if __name__ == '__main__':
    main()

           전공      세부전공        직업      세부직업       관심사  라벨       sum
709  0.306600  0.300281  0.323441  0.341133  0.287751  직업  0.959929
433  0.270378  0.320320  0.326599  0.341133  0.323227  직업  0.954750
807  0.306600  0.304261  0.326599  0.326964  0.307249  직업  0.952490
28   0.287330  0.304222  0.323441  0.341133  0.265682  직업  0.951125
221  0.287330  0.345408  0.320320  0.305573  0.320632  전공  0.945455


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
