In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

from collections import Counter
from tqdm.notebook import tqdm

%matplotlib inline

In [2]:
bill_df = pd.read_csv('20대 21대 의안정보 총합.csv')
print(bill_df.shape)
bill_df.head()

FileNotFoundError: [Errno 2] File 20대 21대 의안정보 총합.csv does not exist: '20대 21대 의안정보 총합.csv'

### 1. Data Preprocessing

#### 1.1 20, 21대 국회 데이터 나누기

In [None]:
bill_20th = bill_df.iloc[:25222]
bill_21th = bill_df.iloc[25222:].reset_index(drop=True)

In [None]:
bill_21th[:10]

#### 1.2 제안이유 및 의안명

In [None]:
bill_20th['의안명'] = bill_20th['의안명'].str.strip()
bill_21th['의안명'] = bill_21th['의안명'].str.strip()

In [None]:
bill_20th['제안이유'] = bill_20th['제안이유'].fillna('')
bill_21th['제안이유'] = bill_21th['제안이유'].fillna('')

In [None]:
def clean_reason(df_col): 
    text = df_col[2:-2].replace('제안이유 및 주요내용','').replace('제안이유','')
    return re.sub('[-=.#/:$}·,■?]', ' ', text)

In [None]:
%time bill_20th['제안이유'] = bill_20th['제안이유'].apply(lambda x: clean_reason(x))

In [None]:
%time bill_21th['제안이유'] = bill_21th['제안이유'].apply(lambda x: clean_reason(x))

In [None]:
bill_20th.head()

#### 1.3 제안일자, 의결일자

In [None]:
bill_21th['제안일자'] = pd.to_datetime(bill_21th['제안일자'])
bill_21th['의결일자'] = pd.to_datetime(bill_21th['의결일자'])

bill_20th['제안일자'] = pd.to_datetime(bill_20th['제안일자'])
bill_20th['의결일자'] = pd.to_datetime(bill_20th['의결일자'])

#### 1.4 의결결과

In [None]:
accept_list = ['대안반영폐기', '원안가결', '수정가결', '수정안반영폐기', '가결']
reject_list = ['임기만료폐기', '부결', '철회', '반려', '심사대상제외', '폐기']

In [None]:
## 의결결과가 '대안반영폐기', '원안가결', '수정가결', '수정안반영폐기', '가결' 이면 법률반영
##            '임기만료폐기', '부결', '철회', '반려', '심사대상제외', '폐기' 이면 법률미반영
bill_20th.loc[bill_20th['의결결과'].isin(accept_list), '법률반영여부'] = '반영'
bill_20th.loc[bill_20th['의결결과'].isin(reject_list), '법률반영여부'] = '미반영'

#### 1.5 제안자구분

In [14]:
proposer_type_list = ['의원', '위원장', '의장']

In [15]:
bill_20th_new = bill_20th[bill_20th['제안자구분'].isin(proposer_type_list)]
bill_20th_new.reset_index(drop = True, inplace = True)
bill_21th_new = bill_21th[bill_21th['제안자구분'].isin(proposer_type_list)]
bill_21th_new.reset_index(drop = True, inplace = True)

In [16]:
bill_20th_new.loc[bill_20th_new['소관위원회'].isnull(),'소관위원회'] = ''
bill_20th_new.loc[bill_20th_new['소관위원회'] == '[]', '소관위원회'] = ''
bill_20th_new.loc[bill_20th_new['소관위원회'].str.startswith('2'), '소관위원회'] = ''
bill_20th_new['소관위원회'].unique()

array(['법제사법위원회', '행정안전위원회', '정보위원회', '여성가족위원회', '문화체육관광위원회', '국회운영위원회',
       '과학기술정보방송통신위원회', '정무위원회', '기획재정위원회', '국토교통위원회', '환경노동위원회', '',
       '국방위원회', '외교통일위원회', '농림축산식품해양수산위원회', '산업통상자원중소벤처기업위원회', '교육위원회',
       '보건복지위원회', '국회 코로나19 대책 특별위원회', '헌법개정 및 정치개혁 특별위원회',
       '중앙선거관리위원회 위원 선출에 관한 인사청문특별위원회', '예산결산특별위원회', '정치개혁 특별위원회',
       '사법개혁 특별위원회', '윤리특별위원회', '공공부문 채용비리 의혹과 관련된 국정조사특별위원회',
       '헌법재판소 재판관 선출에 관한 인사청문특별위원회', '미세먼지 대책 특별위원회', '교육문화체육관광위원회',
       '안전행정위원회', '산업통상자원위원회', '미래창조과학방송통신위원회',
       '평창동계올림픽 및 국제경기대회지원 특별위원회',
       '박근혜정부의최순실등민간인에의한국정농단의혹사건진상규명을위한국정조사특별위원회', '저출산·고령화대책 특별위원회',
       '가습기살균제 사고 진상규명과 피해구제 및 재발방지 대책마련을 위한 국정조사특별위원회',
       '국회상임위원회 위원정수에 관한 규칙 개정 특별위원회'], dtype=object)

1.5.1 __국회운영위원회__

In [17]:
## 2016년 국회운영위원회 위원장 : 정진석(새누리당/鄭鎭碩)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회운영위원회'
cond3 = bill_20th_new['제안일자'] <= '2016-12-29'
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['정진석(새누리당/鄭鎭碩)']"

## 2017년 국회운영위원회 위원장 : 정우택(새누리당/鄭宇澤)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회운영위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-12-30') & (bill_20th_new['제안일자'] <= '2017-12-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['정우택(새누리당/鄭宇澤)']"

## 2018년 전반기 국회운영위원회 위원장 : 김성태(자유한국당/金聖泰)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회운영위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-12-30') & (bill_20th_new['제안일자'] < '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['정우택(자유한국당/鄭宇澤)']"

## 2018년 후반기 국회운영위원회 위원장 : 홍영표(더불어민주당/洪永杓)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회운영위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] < '2019-06-28')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['홍영표(더불어민주당/洪永杓)']"

## 2018년 후반기 국회운영위원회 위원장 : 이인영(더불어민주당/李仁榮)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회운영위원회'
cond3 = (bill_20th_new['제안일자'] >= '2019-06-28') & (bill_20th_new['제안일자'] < '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이인영(더불어민주당/李仁榮)']"

1.5.2 __법제사법위원회__

In [18]:
## 20대 전반기 법제사법위원회 위원장 : 권성동(자유한국당/權性東)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '법제사법위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] < '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['권성동(자유한국당/權性東)']"

## 20대 후반기 법제사법위원회 위원장 : 여상규(미래통합당/余尙奎)
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '법제사법위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] < '2020-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['여상규(미래통합당/余尙奎)']"

1.5.3 __정무위원회__

In [19]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정무위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2017-12-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이진복(새누리당/李珍福)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정무위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-12-30') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김용태(자유한국당/金容兌)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정무위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['민병두(더불어민주당/閔丙梪)']"

1.5.4 __기획재정위원회__

In [20]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '기획재정위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['조경태(새누리당/趙慶泰)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '기획재정위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2019-06-28')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['정성호(더불어민주당/鄭成湖)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '기획재정위원회'
cond3 = (bill_20th_new['제안일자'] >= '2019-06-29') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이춘석(더불어민주당/李春錫)']"

1.5.5 __과학기술정보방송통신위원회__

In [21]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '과학기술정보방송통신위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['신상진(새누리당/申相珍)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '과학기술정보방송통신위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['노웅래(더불어민주당/盧雄來)']"

1.5.6 __교육문화체육관광위원회__

In [22]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '교육문화체육관광위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['유성엽(국민의당/柳成葉)']"

## 교육위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '교육위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이찬열(자유한국당/李燦烈)']"

## 문화체육관광위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '문화체육관광위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-07-16') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['안민석(더불어민주당/安敏錫)']"

1.5.7 __외교통일위원회__

In [23]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '외교통일위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['심재권(더불어민주당/沈載權)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '외교통일위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2019-03-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['윤상현(자유한국당/尹相現)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '외교통일위원회'
cond3 = (bill_20th_new['제안일자'] >= '2019-04-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['강석호(자유한국당/姜碩鎬)']"

1.5.8 __국방위원회__

In [24]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국방위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2017-12-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김영우(새누리당/金榮宇)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국방위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-01-01') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김학용(새누리당/金學容)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국방위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['안규백(더불어민주당/安圭伯)']"

1.5.9 __행정안전위원회__

In [25]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '행정안전위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['유재중(새누리당/柳在仲)']" 

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '행정안전위원회'
cond3 = (bill_20th_new['제안일자'] >= '2018-06-01') & (bill_20th_new['제안일자'] <= '2019-06-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['인재근(더불어민주당/印在謹)']" 

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '행정안전위원회'
cond3 = (bill_20th_new['제안일자'] >= '2019-07-01') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['전혜숙(더불어민주당/全惠淑)']" 

1.5.10 __농림축산식품해양수산위원회__

In [26]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '농림축산식품해양수산위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2017-06-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김영춘(더불어민주당/金榮春)']" 

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '농림축산식품해양수산위원회'
cond3 = (bill_20th_new['제안일자'] > '2017-06-30') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['설훈(더불어민주당/薛勳)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '농림축산식품해양수산위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['황주홍(국민의당/黃柱洪)']"

1.5.11 __산업통상자원중소벤처기업위원회__

In [27]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '산업통상자원중소벤처기업위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['장병완(국민의당/張秉浣)']" 

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '산업통상자원중소벤처기업위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2019-07-04')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['홍일표(자유한국당/洪日杓)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '산업통상자원중소벤처기업위원회'
cond3 = (bill_20th_new['제안일자'] > '2019-07-04') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이종구(자유한국당/李鍾九)']"

1.5.12 __보건복지위원회__

In [28]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '보건복지위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['양승조(더불어민주당/梁承晁)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '보건복지위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2019-07-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이명수(자유한국당/李明洙)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '보건복지위원회'
cond3 = (bill_20th_new['제안일자'] > '2019-07-31') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김세연(자유한국당/金世淵)']"

1.5.13 __환경노동위원회__

In [29]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '환경노동위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['홍영표(더불어민주당/洪永杓)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '환경노동위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김학용(자유한국당/金學容)']"

1.5.14 __국토교통위원회__

In [30]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국토교통위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['조정식(더불어민주당/趙正湜)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국토교통위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['박순자(자유한국당/朴順子)']"

1.5.15 __정보위원회__

In [31]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2016-12-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이철우(새누리당/李喆雨)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-01-01') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['강석호(자유한국당/姜碩鎬)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2018-12-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이학재(바른미래당/李鶴宰)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-12-31') & (bill_20th_new['제안일자'] <= '2020-01-05')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['이혜훈(바른미래당/李惠薰)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] > '2020-01-05') & (bill_20th_new['제안일자'] <= '2020-02-05')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['박주선(바른미래당/朴柱宣)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정보위원회'
cond3 = (bill_20th_new['제안일자'] > '2020-02-05') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김민기(더불어민주당/金敏基)']"

1.5.16 __여성가족위원회__

In [32]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '여성가족위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['남인순(더불어민주당/南仁順)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '여성가족위원회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2019-06-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['전혜숙(더불어민주당/全惠淑)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '여성가족위원회'
cond3 = (bill_20th_new['제안일자'] > '2019-06-30') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['인재근(더불어민주당/印在謹)']"

1.5.17 __예산결산특별위원회__

In [33]:
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '예산결산특별위원회'
cond3 = (bill_20th_new['제안일자'] >= '2016-06-13') & (bill_20th_new['제안일자'] <= '2017-05-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김현미(더불어민주당/金賢美)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '예산결산특별위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-05-31') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['백재현(더불어민주당/白在鉉)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '예산결산특별위원회회'
cond3 = (bill_20th_new['제안일자'] > '2018-05-29') & (bill_20th_new['제안일자'] <= '2019-03-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['안상수(자유한국당/安相洙)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '예산결산특별위원회'
cond3 = (bill_20th_new['제안일자'] > '2019-03-31') & (bill_20th_new['제안일자'] <= '2019-05-31')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['황영철(자유한국당/黃永哲)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '예산결산특별위원회'
cond3 = (bill_20th_new['제안일자'] > '2019-05-31') & (bill_20th_new['제안일자'] <= '2020-05-30')
bill_20th_new.loc[cond1 & cond2 & cond3, '제안자'] = "['김재원(자유한국당/金在原)']"

1.5.18 __특별위원회__

In [34]:
## 1. 국회상임위원회 위원정수에 관한 규칙 개정 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회상임위원회 위원정수에 관한 규칙 개정 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['박완주(더불어민주당/朴完柱)']"

## 2. 저출산·고령화대책 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '저출산·고령화대책 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['나경원(자유한국당/羅卿瑗)']"

## 3. 평창동계올림픽 및 국제경기대회지원 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '평창동계올림픽 및 국제경기대회지원 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['황영철(자유한국당/黃永哲)']"

## 4. 정치개혁 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정치개혁 특별위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-06-27') & (bill_20th_new['제안일자'] <= '2017-12-31')
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['원혜영(더불어민주당/元惠榮)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정치개혁 특별위원회'
cond3 = (bill_20th_new['제안일자'] > '2017-12-31') & (bill_20th_new['제안일자'] <= '2019-07-31')
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['심상정(정의당/沈相奵)']"

cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '정치개혁 특별위원회'
cond3 = (bill_20th_new['제안일자'] >= '2017-06-27') & (bill_20th_new['제안일자'] <= '2019-08-31')
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['홍영표(더불어민주당/洪永杓)']"

## 5. 미세먼지 대책 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '미세먼지 대책 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['전혜숙(더불어민주당/全惠淑)']"

## 6. 헌법개정 및 정치개혁 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '헌법개정 및 정치개혁 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['김재경(자유한국당/金在庚)']"

## 7. 국회 코로나19 대책 특별위원회
cond1 = bill_20th_new['제안자구분'] == '위원장'
cond2 = bill_20th_new['소관위원회'] == '국회 코로나19 대책 특별위원회'
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['김진표(더불어민주당/金珍杓)']"

1.5.19 __국회의장__

In [35]:
cond1 = bill_20th_new['제안자구분'] == '의장'
cond2 = (bill_20th_new['제안일자'] >= '2016-06-09') & (bill_20th_new['제안일자'] <= '2018-05-29')
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['정세균(더불어민주당/丁世均)']"

cond1 = bill_20th_new['제안자구분'] == '의장'
cond2 = (bill_20th_new['제안일자'] >= '2018-05-30') & (bill_20th_new['제안일자'] <= '2020-05-29')
bill_20th_new.loc[cond1 & cond2, '제안자'] = "['문희상(더불어민주당/文喜相)']"

#### 1.6 제안정당

In [36]:
bill_20th_new['제안자'] = bill_20th_new['제안자'].replace(np.nan,'').replace('[]','')
bill_21th_new['제안자'] = bill_21th_new['제안자'].replace(np.nan,'').replace('[]','')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [37]:
proposer_party = []
## 제안자 칼럼에서 제안정당을 추출
for i in bill_20th_new['제안자']:
    party_list = []
    try:
        proposer_split = i[2:-2].split("', '")
        for j in range(0, len(proposer_split)):
            party = proposer_split[j].split('(')[1].split('/')[0]
            party_list.append(party)
            party_set = set(party_list)
            party_list = list(party_set)
    except:
        party_list.append('')
    
    proposer_party.append(party_list)

bill_20th_new['제안정당'] = proposer_party

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [38]:
## 정당으로 데이터프레임을 생성합니다.
party_list = []
for i in bill_20th_new['제안정당']:
    for j in range(0, len(i)):
        party = i[j]
        party_list.append(party)
        
party_list = list(set(party_list))
print(party_list)

party_df = bill_20th_new['제안정당'].apply(lambda x: pd.Series(x))
party_df.columns = ['제안정당_1','제안정당_2','제안정당_3','제안정당_4','제안정당_5','제안정당_6','제안정당_7','제안정당_8']

## 원핫인코딩을 통해 정당이 칼럼명인 데이터 프레임을 만들고 모두 합칩니다.
df1 = pd.get_dummies(party_df['제안정당_1'])
df2 = pd.get_dummies(party_df['제안정당_2'])
df3 = pd.get_dummies(party_df['제안정당_3'])
df4 = pd.get_dummies(party_df['제안정당_4'])
df5 = pd.get_dummies(party_df['제안정당_5'])
df6 = pd.get_dummies(party_df['제안정당_6'])
df7 = pd.get_dummies(party_df['제안정당_7'])
df8 = pd.get_dummies(party_df['제안정당_8'])

df = df1.add(df2, fill_value=0).add(df3, fill_value=0).add(df4, fill_value=0).add(df5, fill_value=0).add(df6, fill_value=0).add(df7, fill_value=0).add(df8, fill_value=0)

## 원본데이터와 합칩니다.
bill_20th_new = pd.concat([bill_20th_new, df], axis=1)

['', '자유한국당', '바른정당', '우리공화당', '개혁보수신당', '미래통합당', '바른미래당', '미래한국당', '대한애국당', '민생당', '무소속', '정의당', '새로운보수당', '미래를향한전진4.0', '국민의당', '민중당', '새민중정당', '새누리당', '더불어시민당', '민주평화당', '대안신당', '더불어민주당']


In [40]:
proposer_party = []
## 제안자 칼럼에서 제안정당을 추출
for i in bill_21th_new['제안자']:
    party_list = []
    try:
        proposer_split = i[2:-2].split("', '")
        for j in range(0, len(proposer_split)):
            party = proposer_split[j].split('(')[1].split('/')[0]
            party_list.append(party)
            party_set = set(party_list)
            party_list = list(party_set)
    except:
        party_list.append('')
    
    proposer_party.append(party_list)

bill_21th_new['제안정당'] = proposer_party

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [41]:
## 정당으로 데이터프레임을 생성합니다.
party_list = []
for i in bill_21th_new['제안정당']:
    for j in range(0, len(i)):
        party = i[j]
        party_list.append(party)
        
party_list = list(set(party_list))
print(party_list)

['', '미래통합당', '국민의당', '열린민주당', '정의당', '기본소득당', '시대전환', '무소속', '더불어민주당']


In [42]:
party_df = bill_21th_new['제안정당'].apply(lambda x: pd.Series(x))
party_df.columns = ['제안정당_1','제안정당_2','제안정당_3','제안정당_4','제안정당_5','제안정당_6','제안정당_7','제안정당_8']

In [43]:
party_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2730 entries, 0 to 2729
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   제안정당_1  2730 non-null   object
 1   제안정당_2  1032 non-null   object
 2   제안정당_3  285 non-null    object
 3   제안정당_4  91 non-null     object
 4   제안정당_5  28 non-null     object
 5   제안정당_6  4 non-null      object
 6   제안정당_7  1 non-null      object
 7   제안정당_8  1 non-null      object
dtypes: object(8)
memory usage: 170.8+ KB


In [44]:
## 원핫인코딩을 통해 정당이 칼럼명인 데이터 프레임을 만들고 모두 합칩니다.
df1 = pd.get_dummies(party_df['제안정당_1'])
df2 = pd.get_dummies(party_df['제안정당_2'])
df3 = pd.get_dummies(party_df['제안정당_3'])
df4 = pd.get_dummies(party_df['제안정당_4'])
df5 = pd.get_dummies(party_df['제안정당_5'])
df6 = pd.get_dummies(party_df['제안정당_6'])
df7 = pd.get_dummies(party_df['제안정당_7'])
df8 = pd.get_dummies(party_df['제안정당_8'])

df = df1.add(df2, fill_value=0).add(df3, fill_value=0).add(df4, fill_value=0).add(df5, fill_value=0).add(df6, fill_value=0).add(df7, fill_value=0).add(df8, fill_value=0)

## 원본데이터와 합칩니다.
bill_21th_new = pd.concat([bill_21th_new, df], axis=1)

### 1.7 의안번호

In [61]:
## ZZ로 시작하는 의안삭제

startZ_20th = bill_20th_new[bill_20th_new['의안번호'].str.startswith('ZZ')]
startZ_21th = bill_21th_new[bill_21th_new['의안번호'].str.startswith('ZZ')]

bill_20th_new.drop(startZ_20th.index, inplace=True)
bill_20th_new = bill_20th_new.reset_index(drop=True)

bill_21th_new.drop(startZ_21th.index, inplace=True)
bill_21th_new = bill_21th_new.reset_index(drop=True)

In [62]:
bill_20th_new.to_csv('bill_20th_data_final.csv', encoding='utf-8-sig', index=False)

### 2. EDA

In [45]:
from matplotlib import font_manager, rc
import platform

if platform.system() == 'Windows':
    path = 'C:/Windows/Fonts/Malgun.ttf'
    font_name = font_manager.FontProperties(fname = path).get_name()
    rc('font', family = font_name)
elif platform.system() == 'Darwin':
    rc('font', family = 'AppleGothic')
else :
    print('Check your OS system')

#### 2.1 21대 국회

In [46]:
bill_20th.groupby('의결결과')['더불어민주당','정의당','새누리당','바른미래당'].sum()

  """Entry point for launching an IPython kernel.


KeyError: "Columns not found: '새누리당', '바른미래당', '정의당', '더불어민주당'"

__민주당__

In [None]:
bill_20th_theminzu = bill_20th[bill_20th['더불어민주당'] == 1]

__의결결과__ : 
폐기 - 임기만료 폐기, 철회, 폐기, 수정안반영폐기, 대안반영폐기, 심사대상제외, 부결
가결 - 수정가결, 원안가결

In [None]:
pass1 = bill_20th_theminzu['의결결과'] == '수정가결'
pass2 = bill_20th_theminzu['의결결과'] == '원안가결'
discard1 = bill_20th_theminzu['의결결과'] != '수정가결'
discard2 = bill_20th_theminzu['의결결과'] != '원안가결'

bill_20th_pass = bill_20th_theminzu[pass1 | pass2]
bill_20th_discard = bill_20th_theminzu[discard1 & discard2]
bill_20th_discard

In [None]:
bill_20th_theminzu['의결결과'].value_counts()

#### 2.2 20대 국회

In [None]:
bill_20th['제안일자']