In [29]:
import pandas as pd
from itertools import combinations
from collections import defaultdict
import csv

In [101]:
with open('filtered_data_2016.csv', 'r', encoding='utf-8') as file:
    reader = csv.reader(file, delimiter=' ', quotechar='"')
    data = list(reader)


In [112]:
papers2016 = pd.DataFrame(data, columns=['Year', 'Title', 'Conference', 'Authors'])
papers2016.head()

Unnamed: 0,Year,Title,Conference,Authors
0,2016,Separating-Plane Factorization Models: Scalabl...,CIKM,"Haolan Chen,Di Niu,Kunfeng Lai,Yu Xu,Masoud Ar..."
1,2016,Joint Collaborative Ranking with Social Relati...,CIKM,"Dimitrios Rafailidis,Fabio Crestani"
2,2016,Probabilistic Approaches to Controversy Detect...,CIKM,"Myungha Jang,John Foley,Shiri Dori-Hacohen,Jam..."
3,2016,Online Food Recipe Title Semantics: Combining ...,CIKM,"Tomasz Kusmierczyk,Kjetil Nørvåg"
4,2016,Bus Routes Design and Optimization via Taxi Da...,CIKM,"Seong-Ping Chuah,Huayu Wu 0001,Yu Lu 0003,Lian..."


In [113]:
coauthor_counts = defaultdict(int)

In [114]:
for _, row in papers2016.iterrows():
    authors = row['Authors'].split(',')
    print("Authors:", authors)
    for pair in combinations(authors, 2):
        sorted_pair = tuple(sorted(pair))  # Ensure the pair is always in the same order
        coauthor_counts[sorted_pair] += 1

Authors: ['Haolan Chen', 'Di Niu', 'Kunfeng Lai', 'Yu Xu', 'Masoud Ardakani']
Authors: ['Dimitrios Rafailidis', 'Fabio Crestani']
Authors: ['Myungha Jang', 'John Foley', 'Shiri Dori-Hacohen', 'James Allan']
Authors: ['Tomasz Kusmierczyk', 'Kjetil Nørvåg']
Authors: ['Seong-Ping Chuah', 'Huayu Wu 0001', 'Yu Lu 0003', 'Liang Yu', 'Stéphane Bressan']
Authors: ['Minwei Feng', 'Bing Xiang', 'Bowen Zhou']
Authors: ['Ragunathan Mariappan', 'Balaji Peddamuthu', 'Preethi R. Raajaratnam', 'Sandipan Dandapat', 'Neeta Pande', 'Shourya Roy']
Authors: ['Lei Shi 0002', 'Hanghang Tong', 'Chaoli Wang 0001', 'Leman Akoglu']
Authors: ['Dongwoo Kim 0002', 'Lexing Xie', 'Cheng Soon Ong']
Authors: ['Lei Zhang 0034', 'Michael Färber 0001', 'Achim Rettinger']
Authors: ['Sangwoo Kim', 'Jake Y. Chen', 'Vincenzo Cutello', 'Doheon Lee']
Authors: ['Md. Saiful Islam 0003', 'Chengfei Liu', 'J. Wenny Rahayu', 'Tarique Anwar']
Authors: ['Mostafa Dehghani 0001', 'Hosein Azarbonyad', 'Jaap Kamps', 'Djoerd Hiemstra', 'Maa

In [115]:
print("Co-authorship pairs and counts:")
print(coauthor_counts)

Co-authorship pairs and counts:
defaultdict(<class 'int'>, {('Di Niu', 'Haolan Chen'): 1, ('Haolan Chen', 'Kunfeng Lai'): 1, ('Haolan Chen', 'Yu Xu'): 1, ('Haolan Chen', 'Masoud Ardakani'): 1, ('Di Niu', 'Kunfeng Lai'): 1, ('Di Niu', 'Yu Xu'): 1, ('Di Niu', 'Masoud Ardakani'): 1, ('Kunfeng Lai', 'Yu Xu'): 1, ('Kunfeng Lai', 'Masoud Ardakani'): 1, ('Masoud Ardakani', 'Yu Xu'): 1, ('Dimitrios Rafailidis', 'Fabio Crestani'): 1, ('John Foley', 'Myungha Jang'): 1, ('Myungha Jang', 'Shiri Dori-Hacohen'): 1, ('James Allan', 'Myungha Jang'): 1, ('John Foley', 'Shiri Dori-Hacohen'): 1, ('James Allan', 'John Foley'): 2, ('James Allan', 'Shiri Dori-Hacohen'): 1, ('Kjetil Nørvåg', 'Tomasz Kusmierczyk'): 1, ('Huayu Wu 0001', 'Seong-Ping Chuah'): 1, ('Seong-Ping Chuah', 'Yu Lu 0003'): 1, ('Liang Yu', 'Seong-Ping Chuah'): 1, ('Seong-Ping Chuah', 'Stéphane Bressan'): 1, ('Huayu Wu 0001', 'Yu Lu 0003'): 1, ('Huayu Wu 0001', 'Liang Yu'): 1, ('Huayu Wu 0001', 'Stéphane Bressan'): 2, ('Liang Yu', 'Yu Lu 0

In [116]:
coauthor_data = {
    'from': [],
    'to': [],
    'weight': []
}

In [117]:
for (author1, author2), count in coauthor_counts.items():
    coauthor_data['from'].append(author1)
    coauthor_data['to'].append(author2)
    coauthor_data['weight'].append(count)

In [118]:
coauthors2016 = pd.DataFrame(coauthor_data)

In [119]:
coauthors2016

Unnamed: 0,from,to,weight
0,Di Niu,Haolan Chen,1
1,Haolan Chen,Kunfeng Lai,1
2,Haolan Chen,Yu Xu,1
3,Haolan Chen,Masoud Ardakani,1
4,Di Niu,Kunfeng Lai,1
...,...,...,...
5559,Suhas Ranganath,Xia Hu,1
5560,Jiliang Tang,Suhas Ranganath,1
5561,Suhang Wang,Suhas Ranganath,1
5562,Huan Liu 0001,Suhas Ranganath,1


In [126]:
output_file_path = 'coauthorship2016_graph.csv'
coauthors2016.to_csv(output_file_path, index=False)

## Doing the same for 2017

In [142]:
papers2017 = pd.read_csv('filtered_data_2017.csv', 
                         delimiter=' ', 
                         quotechar='"', 
                         header=None, 
                         names=['Year', 'Title', 'Conference', 'Authors'])


In [143]:
papers2017.head()

Unnamed: 0,Year,Title,Conference,Authors
0,2017,Public Transportation Mode Detection from Cell...,CIKM,"Guanyao Li,Chun-Jie Chen,Sheng-Yun Huang,Ai-Jo..."
1,2017,A Temporal Attentional Model for Rumor Stance ...,CIKM,"Amir Pouran Ben Veyseh,Javid Ebrahimi,Dejing D..."
2,2017,BoostVHT: Boosting Distributed Streaming Decis...,CIKM,"Theodore Vasiloudis,Foteini Beligianni,Gianmar..."
3,2017,Exploiting User Consuming Behavior for Effecti...,CIKM,"Shen Liu,Hongyan Liu"
4,2017,Automatic Navbox Generation by Interpretable C...,CIKM,"Chenhao Xie 0002,Lihan Chen,Jiaqing Liang,Kezu..."


In [144]:
coauthor_counts2017 = defaultdict(int)

In [145]:
for _, row in papers2017.iterrows():
    authors = row['Authors'].split(',')
    print("Authors:", authors)
    for pair in combinations(authors, 2):
        sorted_pair = tuple(sorted(pair))  # Ensure the pair is always in the same order
        coauthor_counts2017[sorted_pair] += 1

Authors: ['Guanyao Li', 'Chun-Jie Chen', 'Sheng-Yun Huang', 'Ai-Jou Chou', 'Xiaochuan Gou', 'Wen-Chih Peng', 'Chih-Wei Yi']
Authors: ['Amir Pouran Ben Veyseh', 'Javid Ebrahimi', 'Dejing Dou', 'Daniel Lowd']
Authors: ['Theodore Vasiloudis', 'Foteini Beligianni', 'Gianmarco De Francisci Morales']
Authors: ['Shen Liu', 'Hongyan Liu']
Authors: ['Chenhao Xie 0002', 'Lihan Chen', 'Jiaqing Liang', 'Kezun Zhang', 'Yanghua Xiao', 'Hanghang Tong', 'Haixun Wang', 'Wei Wang 0009']
Authors: ['Rupesh Gupta', 'Guanfeng Liang', 'Rómer Rosales']
Authors: ['Quan Yuan 0001', 'Jingbo Shang', 'Xin Cao 0001', 'Chao Zhang 0014', 'Xinhe Geng', 'Jiawei Han 0001']
Authors: ['Zhe Chen', 'Sasha Dadiomov', 'Richard Wesley', 'Gang Xiao', 'Daniel Cory', 'Michael J. Cafarella', 'Jock D. Mackinlay']
Authors: ['Sridevi Baskaran', 'Alexander Keller 0003', 'Fei Chiang', 'Lukasz Golab', 'Jaroslaw Szlichta']
Authors: ['Baichuan Zhang', 'Mohammad Al Hasan']
Authors: ['Hoang Dung Vu', 'Kok-Soon Chai', 'Bryan Keating', 'Nuris

In [146]:
print("Co-authorship pairs and counts for 2017:")
print(coauthor_counts2017)

Co-authorship pairs and counts for 2017:
defaultdict(<class 'int'>, {('Chun-Jie Chen', 'Guanyao Li'): 1, ('Guanyao Li', 'Sheng-Yun Huang'): 1, ('Ai-Jou Chou', 'Guanyao Li'): 1, ('Guanyao Li', 'Xiaochuan Gou'): 1, ('Guanyao Li', 'Wen-Chih Peng'): 1, ('Chih-Wei Yi', 'Guanyao Li'): 1, ('Chun-Jie Chen', 'Sheng-Yun Huang'): 1, ('Ai-Jou Chou', 'Chun-Jie Chen'): 1, ('Chun-Jie Chen', 'Xiaochuan Gou'): 1, ('Chun-Jie Chen', 'Wen-Chih Peng'): 1, ('Chih-Wei Yi', 'Chun-Jie Chen'): 1, ('Ai-Jou Chou', 'Sheng-Yun Huang'): 1, ('Sheng-Yun Huang', 'Xiaochuan Gou'): 1, ('Sheng-Yun Huang', 'Wen-Chih Peng'): 1, ('Chih-Wei Yi', 'Sheng-Yun Huang'): 1, ('Ai-Jou Chou', 'Xiaochuan Gou'): 1, ('Ai-Jou Chou', 'Wen-Chih Peng'): 1, ('Ai-Jou Chou', 'Chih-Wei Yi'): 1, ('Wen-Chih Peng', 'Xiaochuan Gou'): 1, ('Chih-Wei Yi', 'Xiaochuan Gou'): 1, ('Chih-Wei Yi', 'Wen-Chih Peng'): 1, ('Amir Pouran Ben Veyseh', 'Javid Ebrahimi'): 1, ('Amir Pouran Ben Veyseh', 'Dejing Dou'): 1, ('Amir Pouran Ben Veyseh', 'Daniel Lowd'): 1, ('

In [151]:
coauthor_data2017 = {
    'from': [],
    'to': [],
    'weight': []
}


In [152]:
for (author1, author2), count in coauthor_counts2017.items():
    coauthor_data2017['from'].append(author1)
    coauthor_data2017['to'].append(author2)
    coauthor_data2017['weight'].append(count)

In [153]:
coauthors2017 = pd.DataFrame(coauthor_data2017)

In [154]:
coauthors2017

Unnamed: 0,from,to,weight
0,Chun-Jie Chen,Guanyao Li,1
1,Guanyao Li,Sheng-Yun Huang,1
2,Ai-Jou Chou,Guanyao Li,1
3,Guanyao Li,Xiaochuan Gou,1
4,Guanyao Li,Wen-Chih Peng,1
...,...,...,...
6512,Diyi Yang,Robert E. Kraut,1
6513,Robert E. Kraut,Zheng Yao,1
6514,Mohammad Rana,Prakruthi Karuna,1
6515,Hemant Purohit,Prakruthi Karuna,1


In [191]:
output_file_path = 'coauthorship2017_graph.csv'
coauthors2017.to_csv(output_file_path, index=False)

## Doing the same for 2018

In [171]:
papers2018 = pd.read_csv('filtered_data_2018.csv', 
                         delimiter=' ', 
                         quotechar='"', 
                         header=None, 
                         names=['Year', 'Title', 'Conference', 'Authors'])

In [172]:
papers2018.head()

Unnamed: 0,Year,Title,Conference,Authors
0,2018,Deep Semantic Hashing with Multi-Adversarial T...,CIKM,"Bingning Wang,Kang Liu 0001,Jun Zhao 0001"
1,2018,Ready for Use: Subject-Independent Movement In...,CIKM,"Dalin Zhang,Lina Yao,Kaixuan Chen,Sen Wang 0001"
2,2018,Rumor Detection with Hierarchical Social Atten...,CIKM,"Han Guo,Juan Cao,Yazi Zhang,Junbo Guo,Jintao Li"
3,2018,Online Learning for Non-Stationary A/B Tests.,CIKM,"Andrés Muñoz Medina,Sergei Vassilvitskii,Dong Yin"
4,2018,Relevance Estimation with Multiple Information...,CIKM,"Junqi Zhang,Yiqun Liu 0001,Shaoping Ma,Qi Tian"


In [173]:
coauthor_counts2018 = defaultdict(int)

In [174]:
for _, row in papers2018.iterrows():
    authors = row['Authors'].split(',')
    print("Authors:", authors)
    for pair in combinations(authors, 2):
        sorted_pair = tuple(sorted(pair))  # Ensure the pair is always in the same order
        coauthor_counts2018[sorted_pair] += 1

Authors: ['Bingning Wang', 'Kang Liu 0001', 'Jun Zhao 0001']
Authors: ['Dalin Zhang', 'Lina Yao', 'Kaixuan Chen', 'Sen Wang 0001']
Authors: ['Han Guo', 'Juan Cao', 'Yazi Zhang', 'Junbo Guo', 'Jintao Li']
Authors: ['Andrés Muñoz Medina', 'Sergei Vassilvitskii', 'Dong Yin']
Authors: ['Junqi Zhang', 'Yiqun Liu 0001', 'Shaoping Ma', 'Qi Tian']
Authors: ['Zhuoren Jiang', 'Liangcai Gao', 'Ke Yuan', 'Zheng Gao', 'Zhi Tang', 'Xiaozhong Liu']
Authors: ['Tiezheng Ge', 'Liqin Zhao', 'Guorui Zhou', 'Keyu Chen', 'Shuying Liu', 'Huiming Yi', 'Zelin Hu', 'Bochao Liu', 'Peng Sun', 'Haoyu Liu', 'Pengtao Yi', 'Sui Huang', 'Zhiqiang Zhang 0011', 'Xiaoqiang Zhu', 'Yu Zhang', 'Kun Gai']
Authors: ['Nicola Ferro 0001', 'Claudio Lucchese', 'Maria Maistro', 'Raffaele Perego 0001']
Authors: ['Hongtao Wang 0002', 'Pan Su', 'Miao Zhao', 'Hongmei Wang', 'Gang Li']
Authors: ['Mohammad Aliannejadi', 'Hamed Zamani', 'Fabio Crestani', 'W. Bruce Croft']
Authors: ['Kan Ren', 'Yuchen Fang', 'Weinan Zhang 0001', 'Shuhao L

AttributeError: 'float' object has no attribute 'split'

In [175]:
coauthor_data2018 = {
    'from': [],
    'to': [],
    'weight': []
}
for (author1, author2), count in coauthor_counts2018.items():
    coauthor_data2018['from'].append(author1)
    coauthor_data2018['to'].append(author2)
    coauthor_data2018['weight'].append(count)

In [188]:
coauthors2018 = pd.DataFrame(coauthor_data2018)

In [189]:
coauthors2018

Unnamed: 0,from,to,weight
0,Bingning Wang,Kang Liu 0001,1
1,Bingning Wang,Jun Zhao 0001,1
2,Jun Zhao 0001,Kang Liu 0001,1
3,Dalin Zhang,Lina Yao,1
4,Dalin Zhang,Kaixuan Chen,1
...,...,...,...
3501,Hongxia Yang,Minghui Wu,1
3502,Hongxia Yang,Jing Ying,1
3503,Kevin Chen-Chuan Chang,Minghui Wu,1
3504,Jing Ying,Kevin Chen-Chuan Chang,1


In [192]:
output_file_path = 'coauthorship2018_graph.csv'
coauthors2018.to_csv(output_file_path, index=False)

## Doing the same 2019

In [212]:
papers2019 = pd.read_csv('filtered_data_2019.csv', 
                         delimiter=' ', 
                         quotechar='"', 
                         header=None, 
                         names=['Year', 'Title', 'Conference', 'Authors'])

In [213]:
coauthor_counts2019 = defaultdict(int)

In [214]:
for _, row in papers2019.iterrows():
    authors = row['Authors'].split(',')
    print("Authors:", authors)
    for pair in combinations(authors, 2):
        sorted_pair = tuple(sorted(pair))  # Ensure the pair is always in the same order
        coauthor_counts2019[sorted_pair] += 1

Authors: ['Siteng Huang', 'Donglin Wang', 'Xuehan Wu', 'Ao Tang']
Authors: ['Ioana Giurgiu', 'Anika Schumann']
Authors: ['Ye Liu 0006', 'Chenwei Zhang', 'Xiaohui Yan', 'Yi Chang 0001', 'Philip S. Yu']
Authors: ['Jing Ma', 'Qiuchen Zhang', 'Jian Lou 0001', 'Joyce C. Ho', 'Li Xiong 0001', 'Xiaoqian Jiang']
Authors: ['Florian Adriaens', 'Çigdem Aslay', 'Tijl De Bie', 'Aristides Gionis', 'Jefrey Lijffijt']
Authors: ['Chen Qu', 'Liu Yang 0005', 'Minghui Qiu', 'Yongfeng Zhang', 'Cen Chen', 'W. Bruce Croft', 'Mohit Iyyer']
Authors: ['Shihao Zou', 'Zhonghua Li', 'Mohammad Akbari 0001', 'Jun Wang 0012', 'Peng Zhang 0002']
Authors: ['Thanasis Vergoulis', 'Serafeim Chatzopoulos', 'Ilias Kanellos', 'Panagiotis Deligiannis', 'Christos Tryfonopoulos', 'Theodore Dalamagas']
Authors: ['Betty van Aken', 'Benjamin Winter', 'Alexander Löser', 'Felix A. Gers']
Authors: ['Suppanut Pothirattanachaikul', 'Takehiro Yamamoto', 'Yusuke Yamamoto', 'Masatoshi Yoshikawa']
Authors: ['Teng Xiao', 'Shangsong Liang', 

In [215]:
coauthor_data2019 = {
    'from': [],
    'to': [],
    'weight': []
}
for (author1, author2), count in coauthor_counts2019.items():
    coauthor_data2019['from'].append(author1)
    coauthor_data2019['to'].append(author2)
    coauthor_data2019['weight'].append(count)

In [216]:
coauthors2019 = pd.DataFrame(coauthor_data2019)

In [217]:
coauthors2019

Unnamed: 0,from,to,weight
0,Donglin Wang,Siteng Huang,1
1,Siteng Huang,Xuehan Wu,1
2,Ao Tang,Siteng Huang,1
3,Donglin Wang,Xuehan Wu,1
4,Ao Tang,Donglin Wang,2
...,...,...,...
12019,Niloy Ganguly,Surya Teja Gora,1
12020,Bivas Mitra,Surya Teja Gora,1
12021,Niloy Ganguly,Ravi Sundaram,1
12022,Bivas Mitra,Ravi Sundaram,1


In [218]:
output_file_path = 'coauthorship2019_graph.csv'
coauthors2019.to_csv(output_file_path, index=False)

## Doing the same 2020

In [220]:
papers2020 = pd.read_csv('filtered_data_2020.csv', 
                         delimiter=' ', 
                         quotechar='"', 
                         header=None, 
                         names=['Year', 'Title', 'Conference', 'Authors'])

In [221]:
coauthor_counts2020 = defaultdict(int)

In [222]:
for _, row in papers2020.iterrows():
    authors = row['Authors'].split(',')
    print("Authors:", authors)
    for pair in combinations(authors, 2):
        sorted_pair = tuple(sorted(pair))  # Ensure the pair is always in the same order
        coauthor_counts2020[sorted_pair] += 1

Authors: ['Senzhang Wang', 'Hao Miao', 'Hao Chen', 'Zhiqiu Huang']
Authors: ['Fabiola Santore', 'Eduardo Cunha de Almeida', 'Wagner Hugo Bonat', 'Eduardo H. M. Pena', 'Luiz Eduardo S. de Oliveira']
Authors: ['Wei Shen 0005', 'Xiaonan He', 'Chuheng Zhang', 'Qiang Ni', 'Wanchun Dou', 'Yan Wang']
Authors: ['Shanshan Huang', 'Kenny Q. Zhu', 'Qianzi Liao', 'Libin Shen', 'Yinggong Zhao']
Authors: ['Jose F. Rodrigues', 'Jean Louis Pépin', 'Lorraine Goeuriot', 'Sihem Amer-Yahia']
Authors: ['Tian Tian', 'Yudong Liu', 'Xiaoyu Yang', 'Yuefei Lyu', 'Xi Zhang', 'Binxing Fang']
Authors: ['Ebrahim Bagheri', 'Huan Liu 0001', 'Kai Shu', 'Fattane Zarrinkalam']
Authors: ['Yuhan Quan', 'Jingtao Ding', 'Depeng Jin', 'Jianbo Yang', 'Xing Zhou', 'Yong Li 0008']
Authors: ['Fernando Diaz 0001', 'Bhaskar Mitra', 'Michael D. Ekstrand', 'Asia J. Biega', 'Ben Carterette']
Authors: ['Zhichun Guo', 'Wenhao Yu', 'Chuxu Zhang', 'Meng Jiang 0001', 'Nitesh V. Chawla']
Authors: ['Yang Yang', 'Junmei Hao', 'Canjia Li', 'Z

In [223]:
coauthor_data2020 = {
    'from': [],
    'to': [],
    'weight': []
}
for (author1, author2), count in coauthor_counts2020.items():
    coauthor_data2020['from'].append(author1)
    coauthor_data2020['to'].append(author2)
    coauthor_data2020['weight'].append(count)

In [224]:
coauthors2020 = pd.DataFrame(coauthor_data2020)

In [225]:
coauthors2020

Unnamed: 0,from,to,weight
0,Hao Miao,Senzhang Wang,1
1,Hao Chen,Senzhang Wang,2
2,Senzhang Wang,Zhiqiu Huang,1
3,Hao Chen,Hao Miao,1
4,Hao Miao,Zhiqiu Huang,1
...,...,...,...
13602,Dheeraj Reddy Pailla,Karandeep Singh Juneja,1
13603,Dheeraj Reddy Pailla,Ponnurangam Kumaraguru,1
13604,Karandeep Singh Juneja,Shwetanshu Singh,1
13605,Ponnurangam Kumaraguru,Shwetanshu Singh,1


In [226]:
output_file_path = 'coauthorship2020_graph.csv'
coauthors2020.to_csv(output_file_path, index=False)