In [1]:
import requests
import json
from datetime import date
import pandas as pd
import base64
import igraph

In [2]:
none_legislator_list = set(
['親民黨黨團',
 '司法院',
 '時代力量黨團',
 '1819',
 '民進黨黨團',
 '國民黨黨團',
 '行政院',
 '考試院'])

In [3]:
class Bill:
    def __init__(self, bill, index):
        self.id = index
        self.date = bill['date']
        self.term = bill['term']
        self.session_times = bill['sessionTimes']
        self.session_period = bill['sessionPeriod']
        self.bill_name = bill['billName']
        if type(bill['billProposer']) == str:
            self.bill_proposer = self.parse_names_to_list(bill['billProposer'])
        else:
            self.bill_proposer = bill['billProposer']
        if type(bill['billCosignatory']) == str:
            self.bill_cosignatory = self.parse_names_to_list(bill['billCosignatory'])
        else:
            self.bill_cosignatory = bill['billCosignatory']
    
    def parse_names_to_list(self, names):
        names = names.replace(' ', '').split(';')
        return [name if name != 'KolasYotaka' else '谷辣斯．尤達卡' for name in names]
    
    def parse_date(self, roc_date):
        year = int(roc_date[0:3]) + 1911
        month = int(roc_date[3:5])
        day = int(roc_date[5:])
        return date(year, month, day)
    
    def has_none_legislator(self):
        for name in self.bill_proposer:
            if name in none_legislator_list:
                return True
        for name in self.bill_cosignatory:
            if name in none_legislator_list:
                return True
        return False
    
    def to_dict(self):
        return {
            'id' : self.id,
            'date': self.date,
            'term': self.term,
            'sessionTimes': self.session_times,
            'sessionPeriod': self.session_period,
            'billName': self.bill_name,
            'billProposer': self.bill_proposer,
            'billCosignatory': self.bill_cosignatory,
        }


In [5]:
for i in range(legislator_df.shape[0]):
    # handle some name special case
    dic = legislator_df.iloc[i].to_dict()
    if dic['name'] == '谷辣斯‧尤達卡Kolas‧Yotaka':
        dic['name'] = '谷辣斯．尤達卡'
    if dic['name'] == '高潞‧以用‧巴魕剌Kawlo‧Iyun‧Pacidal':
        dic['name'] = '高潞．以用．巴魕剌'
    if dic['name'] == '廖國棟Sufin‧Siluko':
        dic['name'] = '廖國棟'
    if dic['name'] == '鄭天財Sra‧Kacaw':
        dic['name'] = '鄭天財'
    if dic['name'] == '簡東明Uliw．Qaljupayare':
        dic['name'] = '簡東明'
    legislator_dict_list.append(dic)

NameError: name 'legislator_df' is not defined

In [4]:
class Legislator:
    def __init__(self, legislator, index):
        self.id = index
        self.term = legislator['term']
        self.name = legislator['name']
        self.sex = legislator['sex']
        self.party = legislator['party']
        self.partyGroup = legislator['partyGroup']
        self.areaName = legislator['areaName']
        self.committee = legislator['committee']
        self.degree = legislator['degree']
        self.experience = legislator['experience']
        self.picUrl = legislator['picUrl']
        if 'picBase64' in legislator:
            self.picBase64 = legislator['picBase64']
        else:
            self.picBase64 = None

    def parse_string_to_list(self, committee_string):
        li = list(filter(lambda x: len(x) > 0, committee_string.replace(' ', '').split(';')))
        return li
    
    def fetch_pic(self):
        result = requests.get(self.picUrl)
        self.picBase64 = base64.b64encode(result.content).decode('ascii')
    
    def save_pic_to_jpg(self, file_path):
        img_byte = base64.b64decode(self.picBase64.encode('ascii'))
        with open(file_path, 'wb') as file:
            file.write(img_byte)

    def to_dict(self):
        return {
            'id': self.id,
            'term': self.term,
            'name': self.name,
            'sex' : self.sex,
            'party': self.party,
            'partyGroup': self.partyGroup,
            'areaName': self.areaName,
            'committee': self.committee,
            'degree': self.degree if type(self.degree) is list else [],
            'experience': self.experience,
            'picUrl': self.picUrl,
            'picBase64': self.picBase64
        }

In [5]:
with open('../src/data/legislator.json', 'r') as file:
    legislator_dict_list = json.load(file)

In [6]:
legislator_list = [Legislator(legislator, legislator['id']) for legislator in legislator_dict_list]

In [7]:
with open('..//src/data/bills.json', 'r') as file:
    bill_dict_list = json.load(file)

In [8]:
billing_list = [Bill(dic, index) for index, dic in enumerate(bill_dict_list)]

In [9]:
with open('../src/data/remove-none-legislator-bill.json', 'r') as file:
    remove_none_legislator_bill_dict_list = json.load(file)

In [10]:
remove_none_legislator_bill_list = [Bill(dic, index) for index, dic in enumerate(remove_none_legislator_bill_dict_list)]

In [11]:
legislator_reverse_map = { legislator.name: legislator.id for legislator in legislator_list }

In [12]:
bill_proposer_edges = {}
bill_cosignatory_edges = {}
bill_total_edges = {}

In [13]:
from itertools import combinations

def generate_edge(names_list):
    # convert name to index
    index_list = [legislator_reverse_map[name] for name in names_list]
    index_list.sort()
    return list(combinations(index_list, 2))


In [14]:
class Edge:
    def __init__(self, edge, weight):
        self.edge = edge
        self.weight = weight
    
    def to_dict(self):
        return {
            'edge': list(self.edge),
            'weight': self.weight
        }

In [15]:
class Node:
    def __init__(self, id, weight):
        self.id = id
        self.weight = weight
    
    def to_dict(self):
        return {
            'id': self.id,
            'weight': self.weight,
        }

In [16]:
class Graph:
    def __init__(self, name, nodes=[], edges=[]):
        self.name = name
        self.nodes = self.to_node(nodes, edges)
        self.edges = edges
        self.communities = []
        self.graph = self.to_igraph()
        self.find_communities()


    def to_igraph(self):
        graph = igraph.Graph()
        graph.add_vertices(nodes)
        graph.add_edges([edge.edge for edge in self.edges], attributes={'weight': [edge.weight for edge in self.edges]} )
        return graph
    
    def find_communities(self):
        cls = self.graph.community_label_propagation(weights='weight')
        subgraphs = cls.subgraphs()
        for subgraph in subgraphs:
            self.communities.append([vx['name'] for vx in subgraph.vs])

    def to_node(self, nodes, edges):
        node_list = [ Node(node, 0) for node in nodes ]
        for edge in edges:
            node_list[edge.edge[0]].weight += edge.weight
            node_list[edge.edge[1]].weight += edge.weight
        return node_list

    def to_dict(self):
        return {
            'name' : self.name,
            'nodes': [node.to_dict() for node in self.nodes],
            'edges': [ edge.to_dict() for edge in self.edges],
            'communities': self.communities
        }

In [17]:
for bill in remove_none_legislator_bill_list:
    proposer_edges = generate_edge(bill.bill_proposer)
    cosignatory_edges = generate_edge(bill.bill_cosignatory)
    total_edges = generate_edge(bill.bill_proposer + bill.bill_cosignatory)
    for edge in proposer_edges:
        if edge in bill_proposer_edges:
            bill_proposer_edges[edge] += 1
        else:
            bill_proposer_edges[edge] = 1

    for edge in cosignatory_edges:
        if edge in bill_cosignatory_edges:
            bill_cosignatory_edges[edge] += 1
        else:
            bill_cosignatory_edges[edge] = 1

    for edge in total_edges:
        if edge in bill_total_edges:
            bill_total_edges[edge] += 1
        else:
            bill_total_edges[edge] = 1
    

In [18]:
nodes = [legislator.id for legislator in legislator_list]

In [19]:
bill_proposer_graph = Graph('發起連署人網路圖', nodes, [Edge(key, value) for key, value in bill_proposer_edges.items()])
bill_cosignatory_graph = Graph('共同連署人網路圖', nodes, [Edge(key, value) for key, value in bill_cosignatory_edges.items()])
bill_total_graph = Graph('連署網路圖', nodes, [Edge(key, value) for key, value in bill_total_edges.items()])

In [28]:
for community in bill_total_graph.communities:
    for member in community:
        print('{}({})'.format(legislator_list[member].name, legislator_list[member].party))
    print("===========================")

王金平(中國國民黨)
王育敏(中國國民黨)
王惠美(中國國民黨)
孔文吉(中國國民黨)
江啟臣(中國國民黨)
吳志揚(中國國民黨)
呂玉玲(中國國民黨)
李彥秀(中國國民黨)
李鴻鈞(親民黨)
周陳秀霞(親民黨)
林為洲(中國國民黨)
林德福(中國國民黨)
林麗蟬(中國國民黨)
柯志恩(中國國民黨)
徐志榮(中國國民黨)
徐榛蔚(中國國民黨)
馬文君(中國國民黨)
高金素梅(無黨團結聯盟)
張麗善(中國國民黨)
許淑華(中國國民黨)
許毓仁(中國國民黨)
陳宜民(中國國民黨)
陳怡潔(親民黨)
陳雪生(中國國民黨)
陳超明(中國國民黨)
陳學聖(中國國民黨)
曾銘宗(中國國民黨)
費鴻泰(中國國民黨)
黃昭順(中國國民黨)
楊鎮浯(中國國民黨)
廖國棟(中國國民黨)
蔣乃辛(中國國民黨)
蔣萬安(中國國民黨)
鄭天財(中國國民黨)
盧秀燕(中國國民黨)
賴士葆(中國國民黨)
簡東明(中國國民黨)
顏寬恒(中國國民黨)
羅明才(中國國民黨)
林奕華(中國國民黨)
童惠珍(中國國民黨)
沈智慧(中國國民黨)
王定宇(民主進步黨)
王榮璋(民主進步黨)
尤美女(民主進步黨)
江永昌(民主進步黨)
何欣純(民主進步黨)
余宛如(民主進步黨)
吳玉琴(民主進步黨)
吳秉叡(民主進步黨)
吳思瑤(民主進步黨)
吳焜裕(民主進步黨)
吳琪銘(民主進步黨)
呂孫綾(民主進步黨)
李昆澤(民主進步黨)
李俊俋(民主進步黨)
李應元(民主進步黨)
谷辣斯．尤達卡(民主進步黨)
周春米(民主進步黨)
林岱樺(民主進步黨)
林俊憲(民主進步黨)
林昶佐(無黨籍)
林淑芬(民主進步黨)
林靜儀(民主進步黨)
邱志偉(民主進步黨)
邱議瑩(民主進步黨)
姚文智(民主進步黨)
柯建銘(民主進步黨)
段宜康(民主進步黨)
洪宗熠(民主進步黨)
洪慈庸(無黨籍)
徐永明(時代力量)
徐國勇(民主進步黨)
高志鵬(民主進步黨)
高潞．以用．巴魕剌(時代力量)
張宏陸(民主進步黨)
張廖萬堅(民主進步黨)
莊瑞雄(民主進步黨)
許智傑(民主進步黨)
陳其邁(民主進步黨)
陳明文(民主進步黨)
陳亭妃(民主進步黨)
陳素月(民主進步黨)
陳曼麗(民主進步黨)
陳歐珀(民主進步黨)
陳瑩(民主進步黨)
陳賴素美(民主進步黨)
黃秀芳(民主進步黨)
黃偉哲(民主進步黨)
黃國昌(時代力量)
黃國書(民主進步黨

In [20]:
import pandas as pd


In [23]:
weights = [edge.weight for edge in bill_total_graph.edges]

In [24]:
se = pd.Series(weights)

In [27]:
se.quantile([.25, .5, .75, .80, .85, .90, .95])

0.25      7.00
0.50     35.00
0.75    188.75
0.80    230.00
0.85    277.00
0.90    336.00
0.95    424.00
dtype: float64

In [60]:
with open('../data/graph.json', 'w') as file:
    json.dump(bill_total_graph.to_dict(), file)

In [34]:
pary_set = set()
for legislator in legislator_list:
    pary_set.add(legislator.party)

In [35]:
party_list = list(pary_set)

In [36]:
party_list

['親民黨', '民主進步黨', '無黨團結聯盟', '中國國民黨', '時代力量', '無黨籍']

最多人連署的法案 (非以黨團名義提出之法案)

In [24]:
bill_count_list = [(bill.bill_name, len(bill.bill_cosignatory)) for bill in remove_none_legislator_bill_list]
bill_count_list.sort(key=lambda x: x[1], reverse=True)

In [25]:
print(bill_count_list[:10])
print(bill_count_list[-11:-1])

[('電業法修正草案', 48), ('食品安全衛生管理法部分條文修正草案', 45), ('警察人員人事條例第二十二條附表一修正草案', 43), ('民法親屬編部分條文修正草案', 42), ('行政院組織法第三條及第四條條文修正草案', 41), ('中央行政機關組織基準法第二十九條及第三十條條文修正草案', 41), ('政黨不當取得財產處理條例草案', 41), ('海洋及漁業部組織法草案', 39), ('政黨法草案', 39), ('二零六震災重建特別條例草案', 39)]
[('野生動物保育法第二十一條之一條文修正草案', 10), ('海洋及漁業部水產試驗所組織法草案', 10), ('海洋及漁業部港灣研究所組織法草案', 10), ('國家海洋研究院組織法修正草案', 10), ('放射性物料管理法第二十一條條文修正草案', 10), ('國家運輸安全調查法草案', 9), ('毒品危害防制條例第四條條文修正草案', 9), ('恢復慰安婦名譽及賠償條例草案', 9), ('地方民意代表費用支給及村里長事務補助費補助條例第七條條文修正草案', 9), ('道路交通管理處罰條例第七十八條條文修正草案', 9)]


發起聯署最多/最少的立委


In [26]:
total_proposed_number_dict = {}
for bill in remove_none_legislator_bill_list:
    for name in bill.bill_proposer:
        if name in total_proposed_number_dict:
            total_proposed_number_dict[name] += 1
        else:
            total_proposed_number_dict[name] = 1

In [27]:
total_proposed_number_list = [(name, count) for name, count in total_proposed_number_dict.items()]

In [28]:
total_proposed_number_list.sort(key=lambda x: x[1], reverse=True)

In [29]:
print(total_proposed_number_list[0:10])
print(total_proposed_number_list[-11:-1])

[('莊瑞雄', 308), ('蘇震清', 278), ('林俊憲', 217), ('邱志偉', 192), ('呂孫綾', 186), ('蔡易餘', 181), ('賴瑞隆', 173), ('鄭寶清', 170), ('黃國書', 155), ('顏寬恒', 147)]
[('蔣絜安', 12), ('李鴻鈞', 12), ('李應元', 11), ('黃國昌', 9), ('柯建銘', 7), ('沈智慧', 6), ('何志偉', 5), ('林奕華', 4), ('童惠珍', 4), ('王金平', 4)]


被連署最多/最少的立委

In [30]:
total_sign_number_dict = {}
for bill in remove_none_legislator_bill_list:
    for name in bill.bill_proposer:
        if name in total_sign_number_dict:
            total_sign_number_dict[name] += len(bill.bill_cosignatory)
        else:
            total_sign_number_dict[name] = len(bill.bill_cosignatory)

In [31]:
total_sign_number_list = [(name, count) for name, count in total_sign_number_dict.items()]
total_sign_number_list.sort(key=lambda x: x[1], reverse=True)

In [32]:
print(total_sign_number_list[0:10])
print(total_sign_number_list[-11:-1])

[('莊瑞雄', 5121), ('蘇震清', 4523), ('林俊憲', 3557), ('呂孫綾', 3218), ('蔡易餘', 3149), ('邱志偉', 3081), ('賴瑞隆', 2815), ('鄭寶清', 2748), ('黃國書', 2505), ('陳亭妃', 2431)]
[('李鴻鈞', 218), ('黃國昌', 214), ('蔣絜安', 192), ('李應元', 191), ('柯建銘', 132), ('沈智慧', 107), ('王金平', 95), ('何志偉', 88), ('林奕華', 69), ('童惠珍', 69)]


兩大黨中誰會為對方聯署

In [30]:
two_biggest_party = set(['中國國民黨', '民主進步黨'])

In [31]:
against_party_dict = {}
for bill in remove_none_legislator_bill_list:
    proposer_party_set = set()
    for proposer in bill.bill_proposer:
        proposer_party = legislator_list[legislator_reverse_map[proposer]].party
        if proposer_party in two_biggest_party:
            proposer_party_set.add(proposer_party)
    # proposed by only party
    if len(proposer_party_set) == 1:
        proposer_party = list(proposer_party_set)[0]
        for cosigner in bill.bill_cosignatory:
            legislator = legislator_list[legislator_reverse_map[cosigner]]
            if legislator.party in two_biggest_party and legislator.party != proposer_party:
                if legislator.name in against_party_dict:
                    against_party_dict[legislator.name] += 1
                else:
                    against_party_dict[legislator.name] = 1
        


In [32]:
against_party_list = [(name, times) for name, times in against_party_dict.items()]
against_party_list.sort(key=lambda x: x[1], reverse=True)

In [33]:
against_party_list[:10]

[('柯志恩', 36),
 ('鄭天財', 30),
 ('陳雪生', 27),
 ('許毓仁', 26),
 ('林麗蟬', 20),
 ('曾銘宗', 19),
 ('羅明才', 17),
 ('江永昌', 16),
 ('鄭寶清', 16),
 ('陳超明', 15)]

小黨在連署中的特色

In [None]:
class Question:
    def __init__(self, name):
        self.name = name
        self.legislator_list = []
        self.count_list = []
    
    def to_dict(self):
        return 