In [301]:
import pandas as pd
import numpy as np

# 決議の情報を作成
def create_resolution_info(r) :
    # ファイル名に変換
    r_f = r.replace('/', '_')
    # タブがファイルに出てこないことが前提
    d = pd.read_csv(r_f + '.csv', header=None, sep='\t')

    # title
    d1 = d[d[0].str.contains('245')]
    d2 = d1[0].str.split('\$\$', expand=True)
    title = d2.iat[0, 1].strip('a')

    # resolution
    d1 = d[d[0].str.contains('791')]
    d2 = d1[0].str.split('\$\$', expand=True)
    resolution = d2.iat[0, 1].strip('a')

    # note
    d1 = d[d[0].str.contains('591')]
    d2 = d1[0].str.split('\$\$', expand=True)
    note = d2.iat[0, 1].strip('a')

    # vote date
    d1 = d[d[0].str.contains('269')]
    d2 = d1[0].str.split('\$\$', expand=True)
    vote_date = d2.iat[0, 1].strip('a')

    return [title, resolution, note, vote_date]

def create_vote_result(r) :
    # ファイル名に変換
    r_f = r.replace('/', '_')
    d = pd.read_csv(r_f + '.csv', header=None, sep='\t')

    # Voting resultのみ残す
    d1 = d[d[0].str.contains('967__')]

    # $$で区切る
    d2 = d1[0].str.split('\$\$', expand=True)

    # 1・2列からa・country_abb列を作る（a, c文字を取り除くだけ）
    d2['a'] = d2[1].str.strip('a')
    d2['country_abb'] = d2[2].str.strip('c')

    # 3列がdから始まっていればvoteに代入
    d2.loc[d2[3].str.startswith('d'), 'vote'] = d2[3].str.strip('d')
    d2 = d2.fillna('')

    # 3・4列がeから始まっていればcountryに代入
    d2.loc[d2[3].str.startswith('e'), 'country'] = d2[3].str.strip('e')
    d2.loc[d2[4].str.startswith('e'), 'country'] = d2[4].str.strip('e')
    # 3列がeから始まっていれば投票なしとしてvoteにXを代入
    d2.loc[d2[3].str.startswith('e'), 'vote'] = 'X'

    # resolution
    d2['resolution'] = r

    d3 = d2[['resolution', 'country', 'country_abb', 'vote']]
    return d3

vr = pd.DataFrame(index=[], columns=['resolution', 'country', 'country_abb', 'vote'])
rr = pd.DataFrame(index=[], columns=['title', 'resolution', 'note', 'vote_date'])

# データの作成

# 決議
res = [
    'A/RES/77/243',
    'A/RES/77/200',
    'A/RES/77/193',
    'A/RES/77/204',
    'A/RES/77/205',
    'A/RES/77/206',
    'A/RES/77/208',
    'A/RES/77/212',
    'A/RES/77/214',
    'A/RES/77/215',
#    'A/RES/ES-11/5'
]


for r in res :
    # 決議の情報
    res_temp = create_resolution_info(r)
    s = pd.Series(res_temp, index=rr.columns)
    rr = rr.append(s, ignore_index=True)

    # 投票の情報
    vr_temp = create_vote_result(r)
    vr = pd.concat([vr, vr_temp])

# カンマはデータに出てくるのでタブ区切りとした
rr.to_csv('resolution_info.csv', index=False, sep='\t')
vr.to_csv('vote_result.csv', index=False, sep='\t')

In [302]:
# Yes, Noをカウント
vr2 = vr.groupby(['resolution', 'vote']).count()
vr2


Unnamed: 0_level_0,Unnamed: 1_level_0,country,country_abb
resolution,vote,Unnamed: 2_level_1,Unnamed: 3_level_1
A/RES/77/193,A,8,8
A/RES/77/193,N,1,1
A/RES/77/193,X,14,14
A/RES/77/193,Y,170,170
A/RES/77/200,A,59,59
A/RES/77/200,N,3,3
A/RES/77/200,X,16,16
A/RES/77/200,Y,115,115
A/RES/77/204,A,10,10
A/RES/77/204,N,50,50


In [311]:
# 国の一覧を作る．重複排除
vr3 = vr[['country', 'country_abb']].drop_duplicates()
vr3.to_csv('country.csv', index=False, sep='\t')
vr3.count()

country        193
country_abb    193
dtype: int64

In [313]:
# 国（の略称）の組を作る
c_pair = []
for row1 in vr3.itertuples() :
    for row2 in vr3.itertuples() :
        if row1[0] < row2[0] :
            c_pair.append((row1[2], row2[2]))

# 決議の一覧を作る


18528

In [370]:
p = ['JPN', 'USA', 'A/RES/77/243']

d = pd.DataFrame(columns=['c1', 'c2', 'id'])
d.loc['A/RES/77/243'] = ['JPN', 'USA' , True]

# voteが一致していたらTrue，不一致はFalseを返す
(vr[(vr.country_abb == p[0]) & (vr.resolution == res)]).vote
(vr[(vr.country_abb == p[1]) & (vr.resolution == res)]).vote

# マージする
d2 = pd.merge(vr, vr, on='resolution')

# 国の組み合わせを限定する
d3 = d2[d2.country_x > d2.country_y]
d3['agree'] = (d3.vote_x == d3.vote_y)
d4 = d3.groupby(['country_abb_x', 'country_abb_y'], as_index=False).sum()
#print(d4.count()) # 18528件
d4.to_csv('agree.csv', index=False, sep='\t')
d4.groupby(['agree']).count()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d3['agree'] = (d3.vote_x == d3.vote_y)


Unnamed: 0_level_0,country_abb_x,country_abb_y
agree,Unnamed: 1_level_1,Unnamed: 2_level_1
0,842,842
1,794,794
2,2237,2237
3,4029,4029
4,583,583
5,422,422
6,583,583
7,1372,1372
8,2158,2158
9,2689,2689


In [368]:
193*193/2

18624.5