### Packages

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import functions    # my own functions which are used in more notebooks

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import numpy as np
import tqdm
import math
import kds
import pickle
from tqdm import tqdm
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
import gensim
print((gensim.__version__))  
from gensim.models import Word2Vec
import gensim.downloader as api
from gensim.models import KeyedVectors
warnings.filterwarnings('ignore')
from sklearn.metrics import roc_curve, auc, roc_auc_score,classification_report, accuracy_score,precision_score,recall_score
from re import search
from sklearn.ensemble import RandomForestClassifier
from re import search

4.2.0


## Upload cleaned abstracts

In [3]:
only_english = True

In [4]:
df_all = pd.read_csv("outputs/df_sw_tok_low_punc_lemm_v7.csv").rename(columns = {'doi_x':'doi'})

In [5]:
len(df_all)

476175

In [6]:
df_all.head(2)

Unnamed: 0.1,Unnamed: 0,Year,Month,abstract,doi,cord_uid,journal,license,authors,len,language,abstract_cleaned
0,0,2001.0,7.0,OBJECTIVE: This retrospective chart review des...,10.1186/1471-2334-1-6,ug7v899j,BMC Infect Dis,no-cc,"Madani, Tariq A; Al-Ghamdi, Aisha A",1158,en,retrospective chart review describes epidemiol...
1,1,2000.0,8.0,Inflammatory diseases of the respiratory tract...,10.1186/rr14,02tnwd4m,Respir Res,no-cc,"Vliet, Albert van der; Eiserich, Jason P; Cros...",718,en,inflammatory disease respiratory tract commonl...


## Parameters

In [7]:
# sady experimentů

params1 = {"train_year_of_citations": 2017,
          "test_year_of_citations": 2018,
          "min_train_year_published":2016,
          "max_train_year_published":2017,
          "min_test_year_published":2018,
          "max_test_year_published":2018,
          "embeddings_from_year": 2016,
          "embeddings_to_year": 2017}


params2 = {"train_year_of_citations": 2018,
          "test_year_of_citations": 2019,
          "min_train_year_published":2017,
          "max_train_year_published":2018,
          "min_test_year_published":2019,
          "max_test_year_published":2019,
          "embeddings_from_year": 2017,
          "embeddings_to_year": 2018}

params3 = {"train_year_of_citations": 2019,
          "test_year_of_citations": 2020,
          "min_train_year_published":2018,
          "max_train_year_published":2019,
          "min_test_year_published":2020,
          "max_test_year_published":2020,
          "embeddings_from_year": 2018,
          "embeddings_to_year": 2019}


params4 = {"train_year_of_citations": 2020,
          "test_year_of_citations": 2021,
          "min_train_year_published":2019,
          "max_train_year_published":2020,
          "min_test_year_published":2020,
          "max_test_year_published":2020,
          "embeddings_from_year": 2019,
          "embeddings_to_year": 2020}

In [8]:
list_experiments = [params1, params2, params3, params4 ]

In [9]:
for params in list_experiments:

    train_year_of_citations = params["train_year_of_citations"]
    test_year_of_citations = params["test_year_of_citations"]  
    min_train_year_published = params["min_train_year_published"]
    max_train_year_published = params["max_train_year_published"]
    min_test_year_published = params["min_test_year_published"]
    max_test_year_published = params["max_test_year_published"]
    embeddings_from_year = params["embeddings_from_year"]
    embeddings_to_year = params["embeddings_to_year"]
    classifier = "lr"  # or "rf"

    # Add target flag for dataframe
    train_df = functions.add_target_opencitatins_marginal(target_year = train_year_of_citations,
                                                      df = df_all,target_col_name="target")
    test_df = functions.add_target_opencitatins_marginal(target_year = test_year_of_citations, 
                                                     df = df_all,target_col_name = "target")

    # Train test split based on year of publication
    train_df = train_df[(train_df['Year']<=max_train_year_published) & (train_df['Year']>= min_train_year_published)]
    train_df = train_df.set_index("doi")
    print(len(train_df))

    test_df = test_df[(test_df['Year']<=max_test_year_published) & (test_df['Year']>= min_test_year_published)] 
    test_df = test_df.set_index("doi")
    print(len(test_df))


#######################################################################################################################
############                                           BOW                                               #####################################################
########################################################################################################################


    cvec = CountVectorizer(analyzer = "word", 
                       tokenizer = None, 
                       ngram_range=(1,1), 
                       binary= True,
                       min_df = 2,
                      ) 

    matrix_bow_train = cvec.fit_transform(train_df['abstract_cleaned'])
    tokens_bow_train = cvec.get_feature_names_out()

    with open('outputs/classifier/train_'+str(train_year_of_citations)+'/tokens_bow_'+str(train_year_of_citations)+'.data', 'wb') as filehandle:
        pickle.dump(tokens_bow_train, filehandle)
    
    matrix_bow_train_pd = pd.DataFrame.sparse.from_spmatrix(matrix_bow_train, columns = tokens_bow_train)
    matrix_bow_train_pd = matrix_bow_train_pd[sorted(matrix_bow_train_pd.columns)]
    print(matrix_bow_train.shape)

    # for testing we need to have the same features like in training, not new, not less ! 
    matrix_bow_test = cvec.fit_transform(test_df['abstract_cleaned'])
    tokens_bow_test = cvec.get_feature_names_out()
    matrix_bow_test_pd = pd.DataFrame.sparse.from_spmatrix(matrix_bow_test, columns = tokens_bow_test)
    print(len(matrix_bow_test_pd))
    not_in_test = np.setdiff1d(tokens_bow_train,tokens_bow_test)
    columns_in_both = list(set(tokens_bow_train) & set(tokens_bow_test))
    matrix_bow_test_pd_without_new_col = matrix_bow_test_pd[columns_in_both]

    for col in tqdm(not_in_test): 
        matrix_bow_test_pd_without_new_col[col] =  np.nan
    print(len(matrix_bow_test_pd_without_new_col.columns))

    matrix_bow_test_pd = matrix_bow_test_pd_without_new_col.fillna(0)
    matrix_bow_test_pd = matrix_bow_test_pd[sorted(matrix_bow_test_pd.columns)]

    X_train_bow, X_test_bow, y_train_bow, y_test_bow = train_test_split(matrix_bow_train_pd, train_df.target, test_size=0.2, random_state=1,stratify= train_df["target"])
    splitted_train_features = y_test_bow.reset_index().merge(train_df, on="doi",how="left")

    # classifier
    cf_bow_splitted_train = LogisticRegression(penalty = "none",random_state = 0).fit(X_train_bow, y_train_bow)
    pickle.dump(cf_bow_splitted_train, open('outputs/classifier/train_'+str(train_year_of_citations)+'/lreg_bow_'+str(train_year_of_citations)+'.sav', 'wb'))
    cf_bow_splitted_train_reg = LogisticRegression(random_state = 0).fit(X_train_bow, y_train_bow)
    pickle.dump(cf_bow_splitted_train_reg, open('outputs/classifier/train_'+str(train_year_of_citations)+'/lreg_reg_bow_'+str(train_year_of_citations)+'.sav', 'wb'))


    y_pred_bow_splitted_train = cf_bow_splitted_train.predict_proba(X_test_bow)[:,1]
    y_pred_bow_whole_train = cf_bow_splitted_train.predict_proba(matrix_bow_train_pd)[:,1]
    y_pred_bow_test = cf_bow_splitted_train.predict_proba(matrix_bow_test_pd)[:,1]


    matrix_pred_real = [ 
             ('predictions whole train set', y_pred_bow_whole_train, train_df.target),
             ('predictions splitted train set', y_pred_bow_splitted_train,y_test_bow),
             ('predictions tested new month',y_pred_bow_test,test_df.target),
             ]

    # results
    results_train = pd.DataFrame(zip(list( y_pred_bow_whole_train),list(train_df.target), list(train_df.Year), list(train_df.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])
    results_splitted_train = pd.DataFrame(zip(list( y_pred_bow_splitted_train),list(y_test_bow), list(splitted_train_features.Year), list(splitted_train_features.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])
    results_test = pd.DataFrame(zip(list( y_pred_bow_test),list(test_df.target), list(test_df.Year), list(test_df.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])


    # Top x% with highest OpenCitations + x% of lowest citations
    list_df = []
    for results in [results_train,results_splitted_train, results_test]:
        auc_list=[]
        perc_list = [0.19,0.17,0.15,0.13,0.11,0.09,0.07,0.05,0.03,0.01] 
        for perc in perc_list:
            auc_list.append(roc_auc_score(functions.x_first_last_val(results,perc).real.values, 
                                      functions.x_first_last_val(results,perc).y_pred.values
                                     )
                       )
        auc_per_perc = pd.DataFrame(zip(auc_list,perc_list),columns=["auc","perc"])
        list_df.append(auc_per_perc)

    res_list = []
    perc_list_1 = [1.0] + perc_list
    print(perc_list_1)
    
    for frac in perc_list_1:
        if frac == 1: 
            df_res = functions.resulted_matrics_table(matrix_pred_real,frac_articles=1)

        if frac <1:
            y_real_train = functions.x_first_last_val(results_train,frac).real.values
            probs_train =functions.x_first_last_val(results_train,frac).y_pred.values
            y_real_train_spl = functions.x_first_last_val(results_splitted_train,frac).real.values
            probs_train_spl =functions.x_first_last_val(results_splitted_train,frac).y_pred.values
            y_real_test = functions.x_first_last_val(results_test,frac).real.values
            probs_test =functions.x_first_last_val(results_test,frac).y_pred.values

            matrix_pred_real_2 = [ 
             ('predictions whole train set', probs_train, y_real_train),
             ('predictions tested new month',probs_test,  y_real_test),
              ('predictions splitted train set', probs_train_spl,y_real_train_spl)
             ]
        
            df_res  = functions.resulted_matrics_table(matrix_pred_real_2,frac_articles=frac)
        df_res["perc"] = frac
        res_list.append(df_res)    
     
    # output - accuracy on test set of BOW
    res_all = pd.concat(res_list).pivot(index = "perc",columns='dataset_of_predictions', values=["AUC","Accuracy","Precision","Recall"])
    res_all.to_csv('outputs/classifier/train_'+str(train_year_of_citations)+'/res_all_bow_'+str(train_year_of_citations)+'.csv')
    
    high_score_art_lr_bow = pd.DataFrame(list(zip(list(y_pred_bow_test), list(test_df.target.values), list(test_df.abstract.values),list(test_df.index),list(test_df.OpenCitations.values),list(test_df.Year.values) )),columns =["score","target","abstract","doi","OpenCitations","Year"])
    high_score_art_lr_bow = high_score_art_lr_bow.sort_values("score",ascending=False)
    #high_score_art_lr_bow.to_csv('outputs/classifier/train_'+str(train_year_of_citations)+'/'+"score_art_lr_bow.csv")


####################################################################################################################
########################################               WORD2VEC                            #########################
####################################################################################################################

    if only_english:
        model_w2v = gensim.models.Word2Vec.load("outputs/w2v/w2v_published_between_"+str(embeddings_from_year) + " and "+ str(embeddings_to_year)+".model")

    if not only_english:
        model_w2v = gensim.models.Word2Vec.load("outputs/w2v/w2v_published_between_"+str(embeddings_from_year) + " and "+ str(embeddings_to_year)+".model")

    train_df["abstract_tokenized"] = functions.tokenized_column(train_df.reset_index()["abstract_cleaned"])
    df_X_train_avg = functions.transform_to_document_vector(text_col_tokenized = train_df.reset_index().abstract_tokenized,model = model_w2v,index_col_list = list(train_df.index),agg_func = "avg").fillna(0)
    df_X_train_sum = functions.transform_to_document_vector(text_col_tokenized = train_df.reset_index().abstract_tokenized,model = model_w2v,index_col_list = list(train_df.index),agg_func = "sum").fillna(0)

    test_df["abstract_tokenized"] = functions.tokenized_column(test_df.reset_index()["abstract_cleaned"])
    df_X_test_avg = functions.transform_to_document_vector(text_col_tokenized = test_df.reset_index().abstract_tokenized,model = model_w2v,index_col_list = list(test_df.index),agg_func = "avg").fillna(0)
    df_X_test_sum = functions.transform_to_document_vector(text_col_tokenized = test_df.reset_index().abstract_tokenized,model = model_w2v,index_col_list = list(test_df.index),agg_func = "sum").fillna(0)


    # USE SAME SET FOR COMPARING LIKE IN BOW !!!
    X_train_w2v_avg = df_X_train_avg.reset_index().rename({"index":"doi"},axis="columns").merge(y_train_bow.reset_index()[["doi"]],how="right",on="doi").set_index("doi")
    X_test_w2v_avg = df_X_train_avg.reset_index().rename({"index":"doi"},axis="columns").merge(y_test_bow.reset_index()[["doi"]],how="right",on="doi").set_index("doi")

    X_train_w2v_sum = df_X_train_sum.reset_index().rename({"index":"doi"},axis="columns").merge(y_train_bow.reset_index()[["doi"]],how="right",on="doi").set_index("doi")
    X_test_w2v_sum = df_X_train_sum.reset_index().rename({"index":"doi"},axis="columns").merge(y_test_bow.reset_index()[["doi"]],how="right",on="doi").set_index("doi")

    y_train_w2v = y_train_bow
    y_test_w2v = y_test_bow

    if classifier == "lr":
        cf_w2v_splitted_train_avg = LogisticRegression(penalty = "none",random_state = 0).fit(X_train_w2v_avg, y_train_w2v)
        pickle.dump(cf_w2v_splitted_train_avg, open('outputs/classifier/train_'+str(train_year_of_citations)+'/'+'lreg_w2v_avg_'+str(train_year_of_citations)+'.sav', 'wb'))
        cf_w2v_splitted_train_avg_reg = LogisticRegression(random_state = 0).fit(X_train_w2v_avg, y_train_w2v)
        pickle.dump(cf_w2v_splitted_train_avg_reg, open('outputs/classifier/train_'+str(train_year_of_citations)+'/'+'lreg_reg_w2v_avg_'+str(train_year_of_citations)+'.sav', 'wb'))
        
    if classifier == "rf":
        cf_w2v_splitted_train_avg = RandomForestClassifier(random_state=0).fit(X_train_w2v_avg, y_train_w2v)
        pickle.dump(cf_w2v_splitted_train_avg, open('outputs/classifier/train_'+str(train_year_of_citations)+'/'+'rf_w2v_avg_'+str(train_year_of_citations)+'.sav', 'wb'))
    
    y_pred_w2v_splitted_train_avg = cf_w2v_splitted_train_avg.predict_proba(X_test_w2v_avg)[:,1]
    y_pred_w2v_whole_train_avg = cf_w2v_splitted_train_avg.predict_proba(df_X_train_avg)[:,1]
    y_pred_w2v_test_avg = cf_w2v_splitted_train_avg.predict_proba(df_X_test_avg)[:,1]

    splitted_train_features = y_test_w2v.reset_index().merge(train_df, on="doi",how="left")

    results_train_w2v_avg = pd.DataFrame(zip(list( y_pred_w2v_whole_train_avg),list(train_df.target), list(train_df.Year), list(train_df.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])
    results_splitted_train_w2v_avg = pd.DataFrame(zip(list( y_pred_w2v_splitted_train_avg),list(y_test_w2v), list(splitted_train_features.Year), list(splitted_train_features.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])
    results_test_w2v_avg = pd.DataFrame(zip(list( y_pred_w2v_test_avg),list(test_df.target), list(test_df.Year), list(test_df.OpenCitations)),columns=["y_pred","real","Year","OpenCitations"])

    matrix_pred_real_w2v = [ 
             ('predictions whole train set', y_pred_w2v_whole_train_avg, train_df.target),
             ('predictions splitted train set', y_pred_w2v_splitted_train_avg,y_test_w2v),
             ('predictions tested new month',y_pred_w2v_test_avg,test_df.target),
             ]

    # Top X% with highest OpenCitations + X% of lowest citations

    list_df_avg = []
    for results in [results_train_w2v_avg,results_splitted_train_w2v_avg, results_test_w2v_avg]:
        auc_list=[]
    
        perc_list = [0.19,0.17,0.15,0.13,0.11,0.09,0.07,0.05,0.03,0.01]     
        for perc in perc_list:
            auc_list.append(roc_auc_score(functions.x_first_last_val(results,perc).real.values, functions.x_first_last_val(results,perc).y_pred.values) )
        auc_per_perc = pd.DataFrame(zip(auc_list,perc_list),columns=["auc","perc"])
        list_df_avg.append(auc_per_perc)
    
    res_list = []
    perc_list_1 = [1.0] + perc_list
    print(perc_list_1)

    for frac in perc_list_1:
        if frac == 1: 
            df_res = functions.resulted_matrics_table(matrix_pred_real_w2v,frac_articles=1)

        if frac <1:
            y_real_train = functions.x_first_last_val(results_train_w2v_avg,frac).real.values
            probs_train =functions.x_first_last_val(results_train_w2v_avg,frac).y_pred.values
            y_real_train_spl = functions.x_first_last_val(results_splitted_train_w2v_avg,frac).real.values
            probs_train_spl =functions.x_first_last_val(results_splitted_train_w2v_avg,frac).y_pred.values
            y_real_test = functions.x_first_last_val(results_test_w2v_avg,frac).real.values
            probs_test =functions.x_first_last_val(results_test_w2v_avg,frac).y_pred.values

            matrix_pred_real_2 = [ 
             ('predictions whole train set', probs_train, y_real_train),
             ('predictions tested new month',probs_test,  y_real_test),
              ('predictions splitted train set', probs_train_spl,y_real_train_spl)
             ]
    
            df_res  = functions.resulted_matrics_table(matrix_pred_real_2,frac_articles=frac)
        df_res["perc"] = frac
        res_list.append(df_res)    
      
    res_all = pd.concat(res_list).pivot(index = "perc",columns='dataset_of_predictions', values=["AUC","Accuracy","Precision","Recall"] )
    res_all.to_csv('outputs/classifier/train_'+str(train_year_of_citations)+'/res_all_w2v_avg_'+str(train_year_of_citations)+'.csv')

    high_score_art_lr_w2v_avg = pd.DataFrame(list(zip(list(y_pred_w2v_test_avg), list(test_df.target.values), list(test_df.abstract.values),list(test_df.index),list(test_df.OpenCitations.values),list(test_df.Year.values) )),columns =["score","target","abstract","doi","OpenCitations","Year"])
    high_score_art_lr_w2v_avg = high_score_art_lr_w2v_avg.sort_values("score",ascending=False)
    #high_score_art_lr_w2v_avg.to_csv('outputs/classifier/train_'+str(train_year_of_citations)+'/'+'score_art_lr_w2v_avg.csv')

target
0    7126
1    5640
Name: count, dtype: int64
target
0    7957
1    6454
Name: count, dtype: int64
2329
799
(2329, 9431)
799


100%|██████████| 4430/4430 [00:02<00:00, 1555.99it/s]


9431
[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]


100%|██████████| 2329/2329 [00:08<00:00, 266.07it/s]
100%|██████████| 2329/2329 [00:08<00:00, 268.66it/s]
100%|██████████| 799/799 [00:03<00:00, 248.95it/s]
100%|██████████| 799/799 [00:03<00:00, 249.14it/s]


[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]
target
0    7957
1    6454
Name: count, dtype: int64
target
0    9189
1    7444
Name: count, dtype: int64
2393
1028
(2393, 9637)
1028


100%|██████████| 3991/3991 [00:03<00:00, 1158.32it/s]


9637
[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]


100%|██████████| 2393/2393 [00:09<00:00, 264.56it/s]
100%|██████████| 2393/2393 [00:09<00:00, 261.93it/s]
100%|██████████| 1028/1028 [00:04<00:00, 243.56it/s]
100%|██████████| 1028/1028 [00:04<00:00, 245.04it/s]


[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]
target
0    9189
1    7444
Name: count, dtype: int64
target
0    22317
1    17775
Name: count, dtype: int64
2816
20620
(2816, 10616)
20620


100%|██████████| 945/945 [00:01<00:00, 876.04it/s]


10616
[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]


100%|██████████| 2816/2816 [00:11<00:00, 255.20it/s]
100%|██████████| 2816/2816 [00:11<00:00, 252.91it/s]
100%|██████████| 20620/20620 [01:25<00:00, 240.12it/s]
100%|██████████| 20620/20620 [01:25<00:00, 241.40it/s]


[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]
target
0    22317
1    17775
Name: count, dtype: int64
target
0    8576
1    4996
Name: count, dtype: int64
23150
8645
(23150, 28050)
8645


100%|██████████| 11874/11874 [00:30<00:00, 383.88it/s]


28050
[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]


100%|██████████| 23150/23150 [01:29<00:00, 259.68it/s]
100%|██████████| 23150/23150 [01:28<00:00, 261.60it/s]
100%|██████████| 8645/8645 [00:32<00:00, 268.72it/s]
100%|██████████| 8645/8645 [00:32<00:00, 270.08it/s]


[1.0, 0.19, 0.17, 0.15, 0.13, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]


### 1) Table with AUC

In [10]:
list_train_years = [2017,2018,2019,2020]
perc = ["0.05","1.0"]

fin_list = []
for train_year in list_train_years:
    
    bow = pd.read_csv('outputs/classifier/train_'+str(train_year)+'/'+"res_all_bow_"+str(train_year)+".csv")
    bow.columns = bow.iloc[0]
    bow = bow.iloc[2:]
    auc_bow = bow.iloc[:, 0:4]
    auc_bow = auc_bow[auc_bow["dataset_of_predictions"].isin(perc)]
    auc_bow["Train/Test years"] = str(train_year-1)+"+"+str(train_year)+"/"+str(train_year+1)
    auc_bow["model"] = "bow"
    
    w2v = pd.read_csv('outputs/classifier/train_'+str(train_year)+'/'+"res_all_w2v_avg_"+str(train_year)+".csv")
    w2v.columns = w2v.iloc[0]
    w2v = w2v.iloc[2:]
    auc_w2v = w2v.iloc[:, 0:4]
    auc_w2v = auc_w2v[auc_w2v["dataset_of_predictions"].isin(perc)]
    auc_w2v["Train/Test years"] = str(train_year-1)+"+"+str(train_year)+"/"+str(train_year+1)
    auc_w2v["model"] = "w2v"
    
    fin = pd.concat([auc_bow,auc_w2v]).pivot(index=["Train/Test years","dataset_of_predictions"],columns = "model",
                                             values=["predictions splitted train set","predictions tested new month","predictions whole train set"])
    fin_list.append(fin)

In [11]:
pd.concat(fin_list)

Unnamed: 0_level_0,Unnamed: 1_level_0,predictions splitted train set,predictions splitted train set,predictions tested new month,predictions tested new month,predictions whole train set,predictions whole train set
Unnamed: 0_level_1,model,bow,w2v,bow,w2v,bow,w2v
Train/Test years,dataset_of_predictions,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2016+2017/2018,0.05,0.5954631379962193,0.7145557655954631,0.755,0.7687499999999999,0.9809007134363852,0.8195600475624256
2016+2017/2018,1.0,0.5489338076148532,0.6110891536333352,0.5621389275286037,0.5839943857538503,0.9360476414284294,0.6932079405393519
2017+2018/2019,0.05,0.7934027777777778,0.7239583333333334,0.5922722029988465,0.6778162245290273,0.9354166666666668,0.7775
2017+2018/2019,1.0,0.6126668808452916,0.6295066752338117,0.5481855350559098,0.5808220320036983,0.947599682389802,0.6637065968496502
2018+2019/2020,0.05,0.7538265306122449,0.7270408163265306,0.5160612665939766,0.4671657756023033,0.9641869121271566,0.7525275388561943
2018+2019/2020,1.0,0.596755143510287,0.6064516129032258,0.506827404654271,0.4869826714004774,0.9356083763317274,0.6702756788040334
2019+2020/2021,0.05,0.6447588436385256,0.8343304102259215,0.7339731224279837,0.761016803840878,0.9320250208059278,0.8319425428273988
2019+2020/2021,1.0,0.5723010887772194,0.6752306989492919,0.6493484744929164,0.622964436036656,0.9125345414619968,0.6714612204043875
