# 정부안 분석

In [3]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
import re
from tqdm import tqdm
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
import os
from gensim import models
import fasttext.util
from sklearn.cluster import AgglomerativeClustering
from konlpy.tag import Okt
import logging
import hanja
from krwordrank.hangle import normalize
import datetime
%matplotlib inline

In [4]:
gender = pd.read_csv('whole-gender-bill-1320.csv', index_col=0,
                           parse_dates=['제안일자','의결일자'],encoding='utf-8-sig')
print(gender.shape)
gender.head(1)

(4521, 16)


Unnamed: 0,의안번호,의안명,제안일자,제안자구분,의결일자,의결결과,제안회기,제안이유,소관위원회,제안자,발의자,제안정당,성별,여성의원 수,정당성향,국회회기
0,131131,소득세법중개정법률안,1990-12-17,위원장,1990-12-18,원안가결,제13대 (1988~1992) 제151회,"정부제출,柳인학·강金식·금봉욱·林춘원·李경재·허만기·홍영기의원외64인 발의,1...",재무위원회,[],위원장,위원장,,0,,13


In [5]:
drop_list = [177919, 177064, 176854, 176797, 176796, 176795, 176794]

gender = gender[~gender['의안번호'].isin(drop_list)]
gender = gender.reset_index()
gender = gender.drop('index',1)
gender.shape

(4514, 16)

In [6]:
gender['제안일자'] = gender['제안일자'].dt.date
gender['의결일자'] = gender['의결일자'].dt.date

In [7]:
gender['제안일자'].unique()

array([datetime.date(1990, 12, 17), datetime.date(1990, 11, 20),
       datetime.date(1990, 10, 8), ..., datetime.date(2016, 6, 1),
       datetime.date(2016, 5, 31), datetime.date(2016, 5, 30)],
      dtype=object)

## 정권 탐색 

### 노태우 정권 
- 1998.02.25 ~ 1993.02.24

In [5]:
gender.loc[((gender['제안일자'] > datetime.date(1988,5,3)) & 
                        (gender['제안일자'] < datetime.date(1993,2,24))), '정권'] = '노태우'

## 김영삼 정권 
- 1993.02.25 ~ 1998.02.24

In [6]:
gender.loc[((gender['제안일자'] > datetime.date(1993,2,25)) & 
                        (gender['제안일자'] < datetime.date(1998,2,24))), '정권'] = '김영삼'

## 김대중 정권 
- 1998.02.25 ~ 2003.02.24

In [7]:
gender.loc[((gender['제안일자'] > datetime.date(1998,2,25)) & 
                        (gender['제안일자'] < datetime.date(2003,2,24))), '정권'] = '김대중'

## 노무현 정권  
- 2003.02.25 ~ 2004.04.14

In [8]:
gender.loc[((gender['제안일자'] > datetime.date(2003,2,25)) & 
                        (gender['제안일자'] < datetime.date(2008,2,24))), '정권'] = '노무현'

## 이명박 정권
- 2008.02.25 ~ 2013.02.24

In [9]:
gender.loc[((gender['제안일자'] > datetime.date(2008,2,25)) & 
                        (gender['제안일자'] < datetime.date(2013,2,24))), '정권'] = '이명박'

## 박근혜 정권
- 2013.02.25 ~ 2017.03.13

In [10]:
gender.loc[((gender['제안일자'] > datetime.date(2013,2,25)) & 
                        (gender['제안일자'] < datetime.date(2017,5,10))), '정권'] = '박근혜'

## 문재인 정권 
- 2017.05.10 ~

In [11]:
gender.loc[(gender['제안일자'] > datetime.date(2017,5,10)), '정권'] = '문재인'

In [12]:
gender.head(1)

Unnamed: 0,의안번호,의안명,제안일자,제안자구분,의결일자,의결결과,제안회기,제안이유,소관위원회,제안자,발의자,제안정당,성별,여성의원 수,정당성향,국회회기,정권
0,131131,소득세법중개정법률안,1990-12-17,위원장,1990-12-18,원안가결,제13대 (1988~1992) 제151회,"정부제출,柳인학·강金식·금봉욱·林춘원·李경재·허만기·홍영기의원외64인 발의,1...",재무위원회,[],위원장,위원장,,0,,13,노태우


In [17]:
pd.DataFrame(gender.groupby(['정권','정당성향','성별'])['발의자'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,발의자
정권,정당성향,성별,Unnamed: 3_level_1
김대중,보수,남성,22
김대중,보수,여성,16
김대중,진보,남성,7
김대중,진보,여성,7
김영삼,보수,남성,9
김영삼,보수,여성,9
김영삼,진보,남성,16
김영삼,진보,여성,6
노무현,보수,남성,86
노무현,보수,여성,97


In [15]:
pd.DataFrame(gender.groupby(['정권','제안자구분'])['발의자'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,발의자
정권,제안자구분,Unnamed: 2_level_1
김대중,위원장,18
김대중,의원,54
김대중,정부,20
김영삼,위원장,13
김영삼,의원,40
김영삼,정부,9
노무현,위원장,55
노무현,의원,347
노무현,정부,52
노태우,위원장,6
