## This notebook compares performance of our models with original Baly's models

In [79]:
import importlib
import classification as nmrc
importlib.reload(nmrc)
import pandas as pd
import concurrent.futures

In [2]:
news_media_data_folder_location='''../News-Media_Reliability/data/'''
news_media_corpus_file = news_media_data_folder_location+'''corpus.csv'''
news_media_feature_location = news_media_data_folder_location+'''features/'''

In [95]:
model1_feature_location = '''features/mimic_model_original.h5/'''
model2_feature_location = '''features/mimic_model_complex.h5/'''
model3_feature_location = '''features/mimic_model_complex_attention.h5/'''
model4_feature_location = '''features/mimic_model_complex_attention_cosine_loss.h5/'''

In [98]:
def run_classification(result_list, name, feature_location, corpus = news_media_corpus_file
                       , task = 'bias', features = ['has_wiki'] ):
    result = nmrc.Classification(corpus, features, task,feature_location)
    result_dict = {'name':name
                    ,'feature_location':feature_location
                    ,'task':task
                    ,'features':",".join(features)
                    ,'F1':result[0]
                    ,'Accuracy':result[1]
                    ,'MAE':result[2]
                    ,'MAE_U':result[3]}
    
    return result_dict
    print('Completed {} classifications'.format(len(result_list)))
    
    

In [118]:
result_list=Manager().list()
tasks = ['bias','fact']
features = [['has_wiki'],  ['wikisummary']]#[['has_wiki'], ['wikicontent'], ['wikisummary'], ['wikitoc'], ['wikicategories']
           #,['has_wiki','wikicontent','wikisummary','wikitoc','wikicategories']]
feature_locations = [news_media_feature_location,model1_feature_location,model2_feature_location, model3_feature_location, model4_feature_location]
model_names = ['original','original_replica','complex', 'complex_with_attention', 'complex_with_attention_cosine_loss']
corpus = news_media_corpus_file
processes = []
parameters_list = []
no_of_cpu=4 # on gcp this can be increased to match capacity

def run_classifier():

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        for i,model_name in enumerate(model_names):
            feature_location =feature_locations[i]
            for task in tasks:
                for feature_list in features:
                    print('corpus',corpus)
                    print('feature_list',feature_list)
                    print('task',task)
                    print('feature_location',feature_location)

                    parameters = (result_list, model_name, feature_location, corpus, task, feature_list)
                    parameters_list.append(parameters)
#                     p = Process(target=run_classification, args=parameters)
#                     p.start()
#                     processes.append(p)
    #             result = run_classification(model_name, feature_location, corpus, task, feature_list)
    #             result_list.append(result)
    #print(parameters_list)
    with Pool(processes=no_of_cpu) as pool:
        result_async = pool.starmap_async(run_classification, parameters_list)

        pool.close()
        pool.join()
        
    #print(result_async.get())

        
run_classifier()

corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['has_wiki']
task bias
feature_location ../News-Media_Reliability/data/features/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['wikisummary']
task bias
feature_location ../News-Media_Reliability/data/features/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['has_wiki']
task fact
feature_location ../News-Media_Reliability/data/features/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['wikisummary']
task fact
feature_location ../News-Media_Reliability/data/features/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['has_wiki']
task bias
feature_location features/mimic_model_original.h5/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['wikisummary']
task bias
feature_location features/mimic_model_original.h5/
corpus ../News-Media_Reliability/data/corpus.csv
feature_list ['has_wiki']
task fact
feature_location features/mimic_model_original.h5/
corpus ../N

In [119]:
#print(result_list)
result_df = pd.DataFrame(result_list[:])
display_result_df = result_df.drop(labels=['feature_location'], axis=1)
#display_result_df=display_result_df.sort_values(by=1)

In [120]:
display_result_df=display_result_df.sort_values(by=['task','F1'], ascending=False)

In [121]:
display_result_df

Unnamed: 0,Accuracy,F1,MAE,MAE_U,features,name,task
4,58.91182,51.879459,0.535647,0.520988,wikisummary,original,fact
1,59.099437,43.527845,0.566604,0.625173,has_wiki,original,fact
12,50.65666,28.190525,0.725141,0.970357,wikisummary,complex,fact
5,50.844278,28.082098,0.725141,0.971731,wikisummary,original_replica,fact
16,50.750469,28.076046,0.724203,0.97037,wikisummary,complex_with_attention,fact
18,50.750469,28.076046,0.724203,0.97037,wikisummary,complex_with_attention_cosine_loss,fact
0,50.844278,22.470978,0.731707,1.0,has_wiki,original_replica,fact
6,50.844278,22.470978,0.731707,1.0,has_wiki,complex,fact
11,50.844278,22.470978,0.731707,1.0,has_wiki,complex_with_attention,fact
15,50.844278,22.470978,0.731707,1.0,has_wiki,complex_with_attention_cosine_loss,fact


In [110]:
# has_wiki - bias
display_result_df[display_result_df['task']=='bias']

Unnamed: 0,Accuracy,F1,MAE,MAE_U,features,name,task
1,26.829268,14.327093,1.631332,2.137366,has_wiki,original,bias
0,26.078799,10.185631,1.400563,1.738304,has_wiki,original_replica,bias
4,26.078799,10.185631,1.400563,1.738304,has_wiki,complex,bias
5,26.078799,10.185631,1.400563,1.738304,has_wiki,complex_with_attention,bias
8,26.078799,10.185631,1.400563,1.738304,has_wiki,complex_with_attention_cosine_loss,bias


In [111]:
# has_wiki - fact
display_result_df[display_result_df['task']=='fact']

Unnamed: 0,Accuracy,F1,MAE,MAE_U,features,name,task
3,59.099437,43.527845,0.566604,0.625173,has_wiki,original,fact
2,50.844278,22.470978,0.731707,1.0,has_wiki,original_replica,fact
6,50.844278,22.470978,0.731707,1.0,has_wiki,complex,fact
7,50.844278,22.470978,0.731707,1.0,has_wiki,complex_with_attention,fact
9,50.844278,22.470978,0.731707,1.0,has_wiki,complex_with_attention_cosine_loss,fact


In [115]:
# wikisummary - bias
display_result_df[display_result_df['task']=='bias']

Unnamed: 0,Accuracy,F1,MAE,MAE_U,features,name,task
1,37.429644,30.023712,1.470919,1.97697,wikisummary,original,bias
4,29.924953,16.903545,1.340525,1.665883,wikisummary,complex,bias
5,29.268293,16.80035,1.349906,1.670594,wikisummary,complex_with_attention,bias
8,29.268293,16.80035,1.349906,1.670594,wikisummary,complex_with_attention_cosine_loss,bias
0,29.831144,16.518796,1.345216,1.675122,wikisummary,original_replica,bias


In [116]:
# wikisummary - fact
display_result_df[display_result_df['task']=='fact']

Unnamed: 0,Accuracy,F1,MAE,MAE_U,features,name,task
3,58.91182,51.879459,0.535647,0.520988,wikisummary,original,fact
6,50.844278,28.436421,0.723265,0.968498,wikisummary,complex,fact
7,50.938086,28.384771,0.722326,0.968453,wikisummary,complex_with_attention,fact
9,50.938086,28.384771,0.722326,0.968453,wikisummary,complex_with_attention_cosine_loss,fact
2,50.938086,28.105398,0.725141,0.972418,wikisummary,original_replica,fact
