In [1]:
import pandas as pd
import re

In [2]:
#reading in files
df_2012 = pd.read_csv('database_miccai_2012_with_cat.csv')
df_2021 = pd.read_csv('database_miccai_2021_with_cat.csv')

In [3]:
#removing unnecessary index column
df_2012 = df_2012.drop("Unnamed: 0", axis = 1)
df_2021 = df_2021.drop("Unnamed: 0", axis = 1)

In [4]:
#finding the references
def find_reference(year, place):
    reference_list = []
    with open(place, "r", encoding = 'utf-8') as part:
        article = part.read()
        if year == 2012:
            references = [i.start() for i in re.finditer("References\n\n1", article)]
            other_ref = [i.start() for i in re.finditer("References\n\n\[1\]", article)]
            all_ref = references + other_ref
            all_ref.sort()
            
        if year == 2021:
            all_ref = [i.start() for i in re.finditer("References", article)]
        for index in all_ref: 
            reference_list.append(article[index:index+8000])

    return reference_list


In [5]:
#saving the references
ref_list2012 = find_reference(2012, r"C:\Users\chris\Desktop\Value-Analysis-Thesis\combining proceedings txt\miccai_2012_full_txt.txt")
ref_list2021 = find_reference(2021, r"C:\Users\chris\Desktop\Value-Analysis-Thesis\combining proceedings txt\miccai_2021_full_txt.txt")

In [6]:
def create_ref_dic(ref_list):
    year={}
    i=0
    for element in ref_list:
        year[i] = []
        references = [i.start() for i in re.finditer("\([0-9][0-9][0-9][0-9]\)", element)]
        for index in references:
            num = element[index+1:index+5]
            if int(num) < 2023: #eliminating some mistakes with numbers that are clearly not years
                year.get(i).append(num)
        i+=1
    return year


In [7]:
def add_references(df, ref_list_all):
    
    
    row_list = df[df['category'] == 'category: classification'].index
    df_class = df[df['category'] == 'category: classification']
    
    ref_dic = create_ref_dic(ref_list_all)
    
    oldest_list = []
    newest_list = []
    range_list = []
    class_dic = {}
    num_ref = []

    for element in row_list:
        ref_dic.get(element).sort()
        class_dic[element] = ref_dic.get(element) #need this otherwise the sort is forgotten
        ref_list = class_dic.get(element)
        oldest_list.append(int(ref_list[0]))
        newest_list.append(int(ref_list[len(ref_list)-1]))
        range_list.append(int(ref_list[len(ref_list)-1])- int(ref_list[0]))
        num_ref.append(len(ref_list))
   
    
    data = {'index': row_list,
            'Number of references': num_ref,
            'Oldest reference': oldest_list,
            'Newest reference' : newest_list,
            'Range of references': range_list
    } 
    
    ref = pd.DataFrame(data)
    
    df_class = df_class.reset_index()
    
    df_class = df_class.merge(ref, on = 'index')
 
    
    return df_class 

    

In [8]:
df_2012class = add_references(df_2012, ref_list2012)

df_2021class = add_references(df_2021, ref_list2021)

In [9]:
#saving updated database to csv
df_2012class.to_csv("database_miccai_2012_with_ref.csv")
df_2021class.to_csv("database_miccai_2021_with_ref.csv")

In [9]:
df_2021class[df_2021class['Part of publication']==5]

Unnamed: 0,index,Title,Authors,Page numbers,DOI,Year of publication,Part of publication,category,Number of references,Oldest reference,Newest reference,Range of references
94,255,CA-Net: Leveraging Contextual Features for Lun...,"Mingzhou Liu, Fandong Zhang, Xinwei Sun, Yizho...",23-32,/chapter/10.1007/978-3-030-87240-3_3,2021,5,category: classification,22,1991,2020,29
95,257,DAE-GCN: Identifying Disease-Related Features ...,"Churan Wang, Xinwei Sun, Fandong Zhang, Yizhou...",43-52,/chapter/10.1007/978-3-030-87240-3_5,2021,5,category: classification,14,1996,2020,24
96,258,Enhanced Breast Lesion Classification via Know...,"Kun Chen, Yuanfan Guo, Canqian Yang, Yi Xu, Ru...",53-63,/chapter/10.1007/978-3-030-87240-3_6,2021,5,category: classification,29,2003,2021,18
97,259,Multiple Meta-model Quantifying for Medical Vi...,"Tuong Do, Binh X. Nguyen, Erman Tjiputra, Minh...",64-74,/chapter/10.1007/978-3-030-87240-3_7,2021,5,category: classification,45,1987,2021,34
98,262,A Coherent Cooperative Learning Framework Base...,"Xinxin Shan, Ying Wen, Qingli Li, Yue Lu, Haib...",96-106,/chapter/10.1007/978-3-030-87240-3_10,2021,5,category: classification,35,2010,2020,10
...,...,...,...,...,...,...,...,...,...,...,...,...
153,327,A Structural Causal Model for MR Images of Mul...,"Jacob C. Reinhold, Aaron Carass, Jerry L. Prince",782-792,/chapter/10.1007/978-3-030-87240-3_75,2021,5,category: classification,34,2008,2021,13
154,328,$$\mathsf {EMA}$$ EMA : Auditing Data Removal...,"Yangsibo Huang, Xiaoxiao Li, Kai Li",793-803,/chapter/10.1007/978-3-030-87240-3_76,2021,5,category: classification,19,1996,2020,24
155,329,AnaXNet: Anatomy Aware Multi-label Finding Cla...,"Nkechinyere N. Agu, Joy T. Wu, Hanqing Chao, I...",804-813,/chapter/10.1007/978-3-030-87240-3_77,2021,5,category: classification,34,2013,2021,8
156,330,Projection-Wise Disentangling for Fair and Int...,"Xianjing Liu, Bo Li, Esther E. Bron, Wiro J. N...",814-823,/chapter/10.1007/978-3-030-87240-3_78,2021,5,category: classification,21,2003,2021,18


In [13]:
df_2012class.head(20)

Unnamed: 0,index,Title,Authors,Page numbers,DOI,Year of publication,Part of publication,category,Number of references,Oldest reference,Newest reference,Range of references
0,4,Surgical Gesture Classification from Video Data,"Benjamín Béjar Haro, Luca Zappella, René Vidal",34-41,/chapter/10.1007/978-3-642-33415-3_5,2012.0,1,category: classification,26,1999,2012,13
1,7,Efficient Optic Cup Detection from Intra-image...,"Yanwu Xu, Jiang Liu, Stephen Lin, Dong Xu, Car...",58-65,/chapter/10.1007/978-3-642-33415-3_8,2012.0,1,category: classification,16,1992,2011,19
2,9,Thoracic Abnormality Detection with Data Adapt...,"Yang Song, Weidong Cai, Yun Zhou, Dagan Feng",74-81,/chapter/10.1007/978-3-642-33415-3_10,2012.0,1,category: classification,12,2004,2011,7
3,10,Domain Transfer Learning for MCI Conversion Pr...,"Bo Cheng, Daoqiang Zhang, Dinggang Shen",82-90,/chapter/10.1007/978-3-642-33415-3_11,2012.0,1,category: classification,15,2001,2012,11
4,12,Incremental Kernel Ridge Regression for the Pr...,"Binbin Pan, James J. Xia, Peng Yuan, Jaime Gat...",99-106,/chapter/10.1007/978-3-642-33415-3_13,2012.0,1,category: classification,11,1985,2009,24
5,13,Fuzzy Multi-class Statistical Modeling for Eff...,"Jose George, Kathleen Vunckx, Elke Van de Cast...",107-114,/chapter/10.1007/978-3-642-33415-3_14,2012.0,1,category: classification,12,1977,2011,34
6,14,Structure and Context in Prostatic Gland Segme...,"Kien Nguyen, Anindya Sarkar, Anil K. Jain",115-123,/chapter/10.1007/978-3-642-33415-3_15,2012.0,1,category: classification,10,1992,2012,20
7,15,Quantitative Characterization of Trabecular Bo...,"Yinxiao Liu, Punam K. Saha, Ziyue Xu",124-131,/chapter/10.1007/978-3-642-33415-3_16,2012.0,1,category: classification,19,1985,2011,26
8,16,"Genetic, Structural and Functional Imaging Bio...","Nikhil Singh, Angela Y. Wang, Preethi Sankaran...",132-140,/chapter/10.1007/978-3-642-33415-3_17,2012.0,1,category: classification,10,1994,2012,18
9,17,Robust MR Spine Detection Using Hierarchical L...,"Yiqiang Zhan, Dewan Maneesh, Martin Harder, Xi...",141-148,/chapter/10.1007/978-3-642-33415-3_18,2012.0,1,category: classification,9,1989,2012,23
