In [1]:
import pandas as pd
import numpy as np

In [2]:
target_year = 1895

In [3]:
edge_df_path = "edge_df_" + str(target_year) + ".csv"
edge_df = pd.read_csv(edge_df_path, index_col=0)

In [4]:
all_topic_list = sorted(set(edge_df["topic_clean"]))

In [5]:
import person

all_person_list = sorted(set(edge_df["person_clean"]))
member_dict = person.get_official_unofficial_member_list(all_person_list)

In [6]:


import hansard

topic_person_dict = hansard.get_node_dict(
    edge_df=edge_df, 
    node_column="topic_clean", 
    count_node_column="person_clean"
)


topic_person_official_dict = hansard.get_node_dict(
    edge_df=edge_df, 
    node_column="topic_clean", 
    count_node_column="person_clean",
    restrict_count_node_list=member_dict["official"]
)


topic_person_unofficial_dict = hansard.get_node_dict(
    edge_df=edge_df, 
    node_column="topic_clean", 
    count_node_column="person_clean",
    restrict_count_node_list=member_dict["unofficial"]
)


results_dict_list = []
for topic, person_list in topic_person_dict.items():
    results_dict = {
        "topic": topic,
        "official": len(topic_person_official_dict.get(topic, [])),
        "unofficial": len(topic_person_unofficial_dict.get(topic, [])),
        "total": len(person_list)
    }

    assert results_dict["official"] + results_dict["unofficial"] == results_dict["total"]
    results_dict_list.append(results_dict)

pd.DataFrame(results_dict_list).set_index("topic").sort_values("total", ascending=False).head(20)




Unnamed: 0_level_0,official,unofficial,total
topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AN AMENDING ORDINANCE.,5,3,8
THE REMUNERATION OF THE TAIPINGSHAN ARBITRATORS.,5,3,8
THE SANITARY BOARD BY-LAWS.,4,3,7
THE MILITARY CONTRIBUTION.,2,5,7
THE APPROPRIATION BILL.,3,3,6
A PRECAUTIONARY MEASURE.,3,3,6
FINANCE COMMITTEE.,3,3,6
THE RECONSTRUCTION OF TAIPINGSHAN.,3,2,5
MEDICAL OFFICER OF HEALTH ORDINANCE.,3,2,5
THE SUPPLEMENTARY APPROPRIATION BILL.,3,2,5


In [7]:
person_topic_dict = hansard.get_node_dict(
    edge_df=edge_df, 
    node_column="person_clean", 
    count_node_column="topic_clean"
)

pd.Series({
    person: len(topic_list) for person, topic_list in person_topic_dict.items()
}).sort_values(ascending=False)




The COLONIAL SECRETARY          58
The ATTORNEY GENERAL            45
HIS EXCELLENCY                  18
Hon. T. H. WHITEHEAD            17
HON. E. R. BELILIOS             14
Hon. HO KAI                     12
Hon. C. P. CHATER               11
Hon. A. McCONACHIE              10
The COLONIAL TREASURER           9
The DIRECTOR OF PUBLIC WORKS     8
The CHAIRMAN                     6
Hon. J. J. KESWICK               3
The HARBOUR MASTER               3
Mr. J. J. Bell                   1
dtype: int64

In [8]:
use_person_list = member_dict["official"] + member_dict["unofficial"]

person_adjacency_matrix = pd.DataFrame(
    np.full((len(use_person_list), len(use_person_list)), 0.0),
    index=use_person_list,
    columns=use_person_list
)



for topic, person_list in topic_person_dict.items():
    for i, person_i in enumerate(person_list):
        for j, person_j in enumerate(person_list):
            if person_i != person_j:
                person_adjacency_matrix[person_i][person_j] += 0.5
                person_adjacency_matrix[person_j][person_i] += 0.5
                
person_adjacency_matrix




Unnamed: 0,HIS EXCELLENCY,The ATTORNEY GENERAL,The CHAIRMAN,The COLONIAL SECRETARY,The COLONIAL TREASURER,The DIRECTOR OF PUBLIC WORKS,The HARBOUR MASTER,HON. E. R. BELILIOS,Hon. A. McCONACHIE,Hon. C. P. CHATER,Hon. HO KAI,Hon. J. J. KESWICK,Hon. T. H. WHITEHEAD,Mr. J. J. Bell
HIS EXCELLENCY,0.0,6.0,0.0,10.0,3.0,3.0,1.0,5.0,2.0,7.0,6.0,1.0,3.0,1.0
The ATTORNEY GENERAL,6.0,0.0,1.0,32.0,6.0,6.0,3.0,5.0,6.0,6.0,7.0,2.0,2.0,0.0
The CHAIRMAN,0.0,1.0,0.0,2.0,1.0,1.0,0.0,2.0,1.0,2.0,3.0,0.0,2.0,0.0
The COLONIAL SECRETARY,10.0,32.0,2.0,0.0,6.0,4.0,3.0,11.0,8.0,8.0,10.0,3.0,11.0,0.0
The COLONIAL TREASURER,3.0,6.0,1.0,6.0,0.0,2.0,1.0,3.0,1.0,4.0,3.0,0.0,0.0,0.0
The DIRECTOR OF PUBLIC WORKS,3.0,6.0,1.0,4.0,2.0,0.0,1.0,3.0,2.0,3.0,2.0,0.0,2.0,0.0
The HARBOUR MASTER,1.0,3.0,0.0,3.0,1.0,1.0,0.0,1.0,2.0,2.0,1.0,2.0,0.0,0.0
HON. E. R. BELILIOS,5.0,5.0,2.0,11.0,3.0,3.0,1.0,0.0,3.0,6.0,4.0,0.0,1.0,0.0
Hon. A. McCONACHIE,2.0,6.0,1.0,8.0,1.0,2.0,2.0,3.0,0.0,2.0,2.0,2.0,1.0,0.0
Hon. C. P. CHATER,7.0,6.0,2.0,8.0,4.0,3.0,2.0,6.0,2.0,0.0,7.0,1.0,2.0,0.0


In [9]:









topic_adjacency_matrix = pd.DataFrame(
    np.full((len(all_topic_list), len(all_topic_list)), 0.0),
    index=all_topic_list,
    columns=all_topic_list
)

for person, topic_list in person_topic_dict.items():
    for i, topic_i in enumerate(topic_list):
        for j, topic_j in enumerate(topic_list):
            if topic_i != topic_j:
                topic_adjacency_matrix[topic_i][topic_j] += 0.5
                topic_adjacency_matrix[topic_j][topic_i] += 0.5
                
                
topic_adjacency_matrix



Unnamed: 0,A PERSONAL EXPLANATION.,A POINT OF ORDER.,A PRECAUTIONARY MEASURE.,A RECORD REVENUE.,ADOPTION OF FINANCIAL MINUTES.,AFFORESTATION DEPARTMENT.,AN AMENDING ORDINANCE.,AN ORDINANCE REPEALED.,APPOINTMENT OF A MEDICAL OFFICER OF HEALTH.,APPOINTMENT OF COLONIAL SECRETARY.,...,THE SANITARY BOARD BY-LAWS.,THE SANITARY COMMITTEES ORDINANCE.,THE STAMP ORDINANCE.,"THE STRIKE ""WEAK AND CRIMINAL"" TO ABANDON PROCEEDINGS.",THE SUPPLEMENTARY APPROPRIATION BILL.,THE SUPREME COURT FUNDS ORDINANCE.,THE TRADE MARKS ORDINANCE.,THE WATER ORDINANCE.,THE WATER SUPPLY.,THE WEARING OF NAVAL AND MILITARY UNIFORMS.
A PERSONAL EXPLANATION.,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0
A POINT OF ORDER.,1.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,0.0,2.0,...,2.0,1.0,0.0,1.0,2.0,0.0,1.0,1.0,1.0,1.0
A PRECAUTIONARY MEASURE.,1.0,2.0,0.0,1.0,1.0,1.0,6.0,2.0,1.0,2.0,...,5.0,2.0,1.0,1.0,4.0,1.0,2.0,2.0,2.0,2.0
A RECORD REVENUE.,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0
ADOPTION OF FINANCIAL MINUTES.,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
THE SUPREME COURT FUNDS ORDINANCE.,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
THE TRADE MARKS ORDINANCE.,1.0,1.0,2.0,1.0,0.0,1.0,2.0,2.0,1.0,1.0,...,2.0,2.0,1.0,0.0,1.0,1.0,0.0,2.0,0.0,2.0
THE WATER ORDINANCE.,1.0,1.0,2.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,...,2.0,3.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0,2.0
THE WATER SUPPLY.,0.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,...,2.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0
