In [29]:
import pandas as pd
import re

In [30]:
#reading in files
df_2012 = pd.read_csv('database_miccai_2012_with_cat.csv')
df_2021 = pd.read_csv('database_miccai_2021_with_cat.csv')

In [31]:
#removing unnecessary index column
df_2012 = df_2012.drop("Unnamed: 0", axis = 1)
df_2021 = df_2021.drop("Unnamed: 0", axis = 1)

In [32]:
#finding the references
def find_reference(year, place):
    reference_list = []
    with open(place, "r", encoding = 'utf-8') as part:
        article = part.read()
        if year == 2012:
            references = [i.start() for i in re.finditer("References\n\n1", article)]
            other_ref = [i.start() for i in re.finditer("References\n\n\[1\]", article)]
            all_ref = references + other_ref
            all_ref.sort()
            
        if year == 2021:
            all_ref = [i.start() for i in re.finditer("References", article)]
        for index in all_ref: 
            reference_list.append(article[index:index+8000])

    return reference_list


In [33]:
#saving the references
ref_list2012 = find_reference(2012, r"C:\Users\chris\Desktop\Value-Analysis-Thesis\combining proceedings txt\miccai_2012_full_txt.txt")
ref_list2021 = find_reference(2021, r"C:\Users\chris\Desktop\Value-Analysis-Thesis\combining proceedings txt\miccai_2021_full_txt.txt")

In [34]:
def create_ref_dic(ref_list):
    year={}
    i=0
    for element in ref_list:
        year[i] = []
        references = [i.start() for i in re.finditer("\([0-9][0-9][0-9][0-9]\)", element)]
        for index in references:
            num = element[index+1:index+5]
            if int(num) < 2023: #eliminating some mistakes with numbers that are clearly not years
                year.get(i).append(num)
        i+=1
    return year


In [35]:
def add_references(df, ref_list_all):
    
    
    row_list = df[df['category'] == 'category: classification'].index
    df_class = df[df['category'] == 'category: classification']
    
    ref_dic = create_ref_dic(ref_list_all)
    
    oldest_list = []
    newest_list = []
    range_list = []
    class_dic = {}
    num_ref = []

    for element in row_list:
        ref_dic.get(element).sort()
        class_dic[element] = ref_dic.get(element) #need this otherwise the sort is forgotten
        ref_list = class_dic.get(element)
        oldest_list.append(int(ref_list[0]))
        newest_list.append(int(ref_list[len(ref_list)-1]))
        range_list.append(int(ref_list[len(ref_list)-1])- int(ref_list[0]))
        num_ref.append(len(ref_list))
   
    
    data = {'index': row_list,
            'Number of references': num_ref,
            'Oldest reference': oldest_list,
            'Newest reference' : newest_list,
            'Range of references': range_list
    } 
    
    ref = pd.DataFrame(data)
    
    df_class = df_class.reset_index()
    
    df_class = df_class.merge(ref, on = 'index')
 
    
    return df_class 

    

In [36]:
df_2012class = add_references(df_2012, ref_list2012)

df_2021class = add_references(df_2021, ref_list2021)

In [38]:
#saving updated database to csv
df_2012class.to_csv("database_miccai_2012_with_ref.csv")
df_2021class.to_csv("database_miccai_2021_with_ref.csv")

In [37]:
df_2021class.head(10)

Unnamed: 0,index,Title,Authors,Page numbers,DOI,Year of publication,Part of publication,category,Number of references,Oldest reference,Newest reference,Range of references
0,17,Improved Brain Lesion Segmentation with Anatom...,"Chenghao Liu, Xiangzhu Zeng, Kongming Liang, Y...",186-195,/chapter/10.1007/978-3-030-87193-2_18,2021,1,category: classification,17,1986,2021,35
1,18,CarveMix: A Simple Data Augmentation Method fo...,"Xinru Zhang, Chenghao Liu, Ni Ou, Xiangzhu Zen...",196-205,/chapter/10.1007/978-3-030-87193-2_19,2021,1,category: classification,16,2000,2021,21
2,25,Partially-Supervised Learning for Vessel Segme...,"Yanyu Xu, Xinxing Xu, Lei Jin, Shenghua Gao, R...",271-281,/chapter/10.1007/978-3-030-87193-2_26,2021,1,category: classification,26,1989,2020,31
3,44,Residual Feedback Network for Breast Lesion Se...,"Ke Wang, Shujun Liang, Yu Zhang",471-481,/chapter/10.1007/978-3-030-87193-2_45,2021,1,category: classification,28,2007,2021,14
4,55,Modality-Aware Mutual Learning for Multi-modal...,"Yao Zhang, Jiawei Yang, Jiang Tian, Zhongchao ...",589-599,/chapter/10.1007/978-3-030-87193-2_56,2021,1,category: classification,30,2010,2021,11
5,61,TUN-Det: A Novel Network for Thyroid Ultrasoun...,"Atefeh Shahroudnejad, Xuebin Qin, Sharanya Bal...",656-667,/chapter/10.1007/978-3-030-87193-2_62,2021,1,category: classification,39,2001,2021,20
6,63,On the Relationship Between Calibrated Predict...,"Teodora Popordanoska, Jeroen Bertels, Dirk Van...",678-688,/chapter/10.1007/978-3-030-87193-2_64,2021,1,category: classification,28,1928,2021,93
7,69,SSLP: Spatial Guided Self-supervised Learning ...,"Jiajun Li, Tiancheng Lin, Yi Xu",3-12,/chapter/10.1007/978-3-030-87196-3_1,2021,2,category: classification,33,2010,2020,10
8,72,Imbalance-Aware Self-supervised Learning for 3...,"Hongwei Li, Fei-Fei Xue, Krishna Chaitanya, Sh...",36-46,/chapter/10.1007/978-3-030-87196-3_4,2021,2,category: classification,25,2006,2021,15
9,73,Self-supervised Visual Representation Learning...,"Pengshuai Yang, Zhiwei Hong, Xiaoxu Yin, Cheng...",47-57,/chapter/10.1007/978-3-030-87196-3_5,2021,2,category: classification,31,2006,2021,15
