<div style="text-align: right;">
    <h1> Exploratory Data Analysis </h1>
</div>

In [1]:
import pandas as pd
import numpy as np

In [None]:
core_df = pd.read_excel('data/코어프로그램.xlsx')
program_df1 = pd.read_excel('data/프로그램_변경이력_1.xlsx')
program_df2 = pd.read_excel('data/프로그램_변경이력_2.xlsx')
employee_df = pd.read_csv("data/직원.csv")
project_df = pd.read_excel("data/프로젝트 정보.xlsx")
merged_df = pd.read_pickle("data/merged_data.pickle")
program_df = pd.concat([program_df1, program_df2])

In [None]:
dict_fromNum_toString = {
    61: "운영부",
    62: "정개부", 
    63: "금개부",
    64: "글개부",
    66: "기개부",
    69: "디개부",
    507: "기획부"
}

## CONCAT

직원 테이블 (ICT 직원만)

In [None]:
data_61 = employee_df[employee_df["JEOM_NO"] == 61]
data_62 = employee_df[employee_df["JEOM_NO"] == 62]
data_63 = employee_df[employee_df["JEOM_NO"] == 63]
data_64 = employee_df[employee_df["JEOM_NO"] == 64]
data_66 = employee_df[employee_df["JEOM_NO"] == 66]
data_69 = employee_df[employee_df["JEOM_NO"] == 69]
data_507 = employee_df[employee_df["JEOM_NO"] == 507]

employee_df = pd.concat([data_61, data_62, data_63, data_64, data_66, data_69, data_507])

## JOIN

In [None]:
without_prefix = [int(name[2:]) if name[0:2] == 'BK' else name for name in program_df["COL07"] ]
program_df["without_prefix"] = without_prefix

In [None]:
dataFrameTable = pd.merge(employee_df, program_df, left_on="JIKWON_NO", right_on="without_prefix", how="left")
dataFrameTable = pd.merge(dataFrameTable, project_df, left_on="COL08", right_on="프로젝트번호", how="left")

In [None]:
dataFrameTable[["JIKWON_NO", "NAME", "프로젝트번호", "프로젝트명"]]

In [None]:
dataFrameTable.columns

## NetworkX(Undirected/Directed Graph) Class

In [None]:
import networkx
import matplotlib
import matplotlib.pyplot as plt
from itertools import combinations
import os 
import random

class Graph:
    
    def __init__(self, dataFrame):

        # 외부에서 완성한 Table을 담고 있는 DataFrame
        self.dataFrame = dataFrame

        # 그래프 본체
        self.graph = networkx.Graph()
        # 그래프 본체에 추가되는 서브 그래프
        self.subgraph = pd.DataFrame()

        # subgraph에 들어갈 프로젝트명
        self.projectName = ""

        # 추가되는 관계
        self.relations = []
        self.workGroups = {}
        # 부서별 색깔
        self.colorGroups = {}
        
        self.groupToColor = {
            "ICT운영부"   : "red",
            "ICT기획부"   : "blue",
            "정보개발부"   : "yellow",
            "금융개발부"   : "purple",
            "글로벌개발부"  : "green", 
            "기관개발부"   : "orange",
            "디지털개발부"  : "pink",
        }

    
    def addSubGraph(self, projectName):
        self.projectName = projectName
        self.subgraph = self.dataFrame[self.dataFrame["프로젝트명"] == self.projectName] 
        
        self.addPeopleToProject()
        self.addPeopleToPeople()
        self.buildConnection()
        
    # 사람과 프로젝트 (같은 프로젝트)
    def addPeopleToProject(self) :
        self.workGroups = {}
        for row in set(self.subgraph[["NAME", "프로젝트명", "JEOM_NAME"]].itertuples(index=False)) :
            col0 = row[0].strip()
            col1 = row[1].strip()
            col2 = row[2].strip()

            #unweighted 라면
            #relations.append((col0, col1))

            #weighted 라면
            self.relations.append((col0, col1, 1))
            if (col2 not in self.workGroups) :
                self.workGroups[col2] = [col0]
            else :
                self.workGroups[col2].append(col0)
                
    # 사람과 사람 (같은 부서)    
    def addPeopleToPeople(self) :

        #people = []
        # key : 부서
        # value : [...부서원]
        for key, value in self.workGroups.items() : 
            #unweighted 라면
            #people.extend(list(combinations(value, 2)))
            #relations.extend(people)

            #weighted 라면

            #같은 부서는 같은 색깔
            colorNum = self.groupToColor[key]
            if len(list(combinations(value, 2))) == 0 :
                self.colorGroups[value[0]] = colorNum
                continue
                
            # 같은 부서원들이 들어있는 리스트에서 2개의 element가 있는 Combinations
            for rel in list(combinations(value, 2)) :
                # 같은 부서는 weight 2
                self.relations.append((rel[0], rel[1], 2))
                # 같은 부서는 같은 색깔
                self.colorGroups[rel[0]] = self.colorGroups[rel[1]] = colorNum


    # 관계 추가
    def buildConnection(self) : 
        self.graph.add_weighted_edges_from(self.relations)
            
    def printGraph(self) : 
        #self.printProjectName()
        #self.printWorkGroups()
        #self.printEdges()
        #self.printNodeColors()
        
        # set node size
        node_sizes = [200 if len(node) > 3 else 100 for node in self.graph.nodes()]
        
        # set node color
        node_colors = [self.colorGroups.get(node, "black") for node in self.graph.nodes()]
        
        # fixed-position layout
        # pos = networkx.draw(self.graph)  
        # pos = networkx.draw_random(self.graph)
        # pos = networkx.draw_circular(self.graph)  
        # pos = networkx.draw_spectral(self.graph)  
        # pos = networkx.draw_spring(self.graph)
        pos = networkx.kamada_kawai_layout(self.graph)

        # graph size
        plt.figure(1,figsize=(20,20)) 
        
        networkx.draw(self.graph, pos, alpha=0.8, node_size = node_sizes, node_color=node_colors, cmap=plt.get_cmap('viridis'), with_labels = False, font_family = 'NanumGothic');
        # networkx.draw(self.graph, pos, node_size = node_sizes, node_color=values, cmap=plt.get_cmap('viridis'), with_labels = False, font_family = 'NanumGothic');
        
        # weight 표현
        # labels = networkx.get_edge_attributes(self.graph, 'weight')
        # networkx.draw_networkx_edge_labels(self.graph, pos, edge_labels=labels)
        
        # img 디렉토리에 저장
        if not os.path.exists("imgs"): os.makedirs("imgs")
        plt.savefig("imgs/" + self.projectName +"_graph.png");
    
    def printProjectName(self) :
        print("프로젝트  :\n", self.projectName)
        print()
    
    def printWorkGroups(self) :
        print("참여 부서 :\n" , self.subgraph[self.subgraph["프로젝트명"] == self.projectName]["JEOM_NAME"].str.strip().unique())
        print()
    
    def printEdges(self): 
        print("edge 별 weight :\n", self.graph.edges(data=True))
        print()
    
    def printNodeColors(self) :
        print("node 별 색깔:\n", self.colorGroups)
        print()

## 서브 그래프 2개

In [None]:
graph2 = Graph(dataFrameTable)
graph2.addSubGraph("TransferWise 해외이체서비스 개발의뢰")
graph2.printGraph()

In [None]:
graph2.addSubGraph("외국환거래약정서 비대면약정 프로세스 개발")
graph2.printGraph()

## 서브 그래프 100개

In [None]:
graph100 = Graph(dataFrameTable)

cnt = 0
for projectName in dataFrameTable["프로젝트명"].dropna().unique() :
    graph100.addSubGraph(projectName)
    cnt += 1
    if cnt == 100 :
        break

graph100.printGraph()

## 서브 그래프 500개

In [None]:
graph500 = Graph(dataFrameTable)

cnt = 0
for projectName in dataFrameTable["프로젝트명"].dropna().unique() :
    graph500.addSubGraph(projectName)
    cnt += 1
    if cnt == 500 :
        break

graph500.printGraph()

## 서브 그래프 1000개

In [None]:
graph1000 = Graph(dataFrameTable)

cnt = 0
for projectName in dataFrameTable["프로젝트명"].dropna().unique() :
    graph1000.addSubGraph(projectName)
    cnt += 1
    if cnt == 1000 :
        break

graph1000.printGraph()

## 서브 그래프 2000개

In [None]:
graph2000 = Graph(dataFrameTable)

cnt = 0
for projectName in dataFrameTable["프로젝트명"].dropna().unique() :
    graph2000.addSubGraph(projectName)
    cnt += 1
    if cnt == 2000 :
        break

graph2000.printGraph()