please please please be the final version

# import stuff

In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spacy
from tqdm import tqdm
import json
import os
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, ConfusionMatrixDisplay
from hpsklearn import svc
from sklearn.model_selection import train_test_split
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK

# prep data

In [2]:
# initial dataset stuff
nlp = spacy.load("spacy-twitter") # out of function so you don't load it every time (it takes a while)

# function for glove embeddings
def embed_dataset(dataset_text):
    encoded = np.array([nlp(text).vector for text in dataset_text])
    return encoded.tolist()

# function to load dataset from folder. Also embeds the text.
def get_dataset(name):
    """
    loads a dataset and embeds the text. text must be in column named "text".
    datasets are in the folder datasets/
    name must be a string that's matches the csv file in datasets
    """
    dataset = pd.read_csv(f'datasets\\{name}.csv')
    dataset.rename(columns = {"Unnamed: 0":"entry"}, inplace=True) #the entry label never carries over
    dataset['e_text'] = embed_dataset(dataset['text'])
    return dataset

# train models

In [82]:
from hpsklearn import HyperoptEstimator
from hyperopt import hp

#function to optimize model with hyperopt
def optimize_model(model, X_train, y_train, evals):
    """
    optimizes a model with hyperopt. returns the optimized and fitted model.
    
    model: should match a sklearn model - list on github. can preset variables or set a range with hp
    X_train and y_train: training set
    evals: number of trials to do
    """
    mod = HyperoptEstimator(classifier=model,
                            preprocessing=[],
                            max_evals=evals,
                            trial_timeout=120,
                            algo=tpe.suggest,
                            verbose=False)
    mod.fit(X_train, y_train, random_state=42)
    #print(mod.best_model())
    return mod

def evaluate_model(model, X_test, y_test):
    pred_y = model.predict(X_test)
    acc_mod = accuracy_score(y_test, pred_y)
    print("Accuracy:", float("{0:.2f}".format(acc_mod*100)), "%")
    f1_mod = f1_score(y_test, pred_y, average="macro")
    print("F1:", float("{0:.2f}".format(f1_mod*100)), "%")
    cm = confusion_matrix(y_test, pred_y)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["true", "false"])
    disp.plot()
    plt.show() 

In [100]:
# get breakdown of categories with confidence level, as well as dataset coverage
def get_category_info(dataset_name, dataset, confidence, size_limit):
    file_name = f"{dataset_name}_cats\\{dataset_name}_categories_organised.json"
    f = open(file_name)
    data = json.load(f)
    info = []
    dataset_coverage = np.zeros([len(dataset)+2], dtype=bool)
    for category in data.keys():
        cat_entries = [int(i) for i in data[category].keys() if data[category][i] > confidence]
        if len(cat_entries) < size_limit:
            print(f"Skipped category: {category} due to low numbers")
            continue
        info.append((category, len(cat_entries)))
        for entry in cat_entries:
            if dataset_coverage[entry] == False:
                dataset_coverage[entry] = True
    uniques, counts = np.unique(dataset_coverage, return_counts=True)
    percentages = dict(zip(uniques, counts * 100 / len(dataset_coverage)))
    return info, percentages

In [None]:
def train_models_v2(dataset_name, train_set, confidence, size_limit, model_list):
    """
    trains a set of models in each category. returns the best model for each category, in the form {'category': [modelscore, modelname, fittedmodel]}

    dataset_name: a string with the name of the training set. used for calling the category file
    train_set: the training set to use
    confidence: the confidence required to consider an entry part of a category
    size_limit: the number of entries needed in a category to consider that category for training
    model_list: the list of models to train. in the form [("model_name1", optimization_function1, model1), ("model_name2", fmin2, optimization_function2, model2), etc]
    """
    file_name = f"{dataset_name}_cats\\{dataset_name}_categories_organised.json"
    f = open(file_name)
    data = json.load(f)
    f.close()
    category_models = {} #this will be returned
    for category in data.keys(): 
        cat_entries = [int(i) for i in data[category].keys() if data[category][i] > confidence]
        
        # skip category if size of category is below limit
        if len(cat_entries) < size_limit:
            print(f"Skipped category: {category} due to low numbers")
            continue
        
        category_data = train_set.filter(axis=0, items=cat_entries)

        #split validation set
        X = category_data.drop('target', axis=1)
        y = category_data["target"]
        X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)

        # skip category if split only has one class
        if (len(np.unique(y_train)) <= 1):
            print(f"Skipped category: {category} due to class issues")
            continue

        X_train_text = np.array([text for text in X_train['e_text']])

        trained_models = []
        # train models from list
        for model_name, fmin, search_space, model in model_list:
            try:
                best_params = fmin
                model.set_params(space_eval(search_space, best_params))
                model.fit(X_train_text, y_train)
                trained_models.append(model)
            except:
                print(f"Error training {model_name} in category {category}, skipping")
                continue
        
        #get the best model
        X_val_text = np.array([text for text in X_val['e_text']])
        best_model = [0, "x", "x"]
        for model_name, model in trained_models:
            score = model.score(X_val_text, y_val)
            if score > best_model[0]:
                best_model = [score, model_name, model]

        #add best model to list
        category_models[category] = best_model
    return category_models

In [78]:
def train_models(dataset_name, train_set, confidence, size_limit, model_list):
    """
    trains a set of models in each category. returns the best model for each category, in the form {'category': [modelscore, modelname, fittedmodel]}

    dataset_name: a string with the name of the training set. used for calling the category file
    train_set: the training set to use
    confidence: the confidence required to consider an entry part of a category
    size_limit: the number of entries needed in a category to consider that category for training
    model_list: the list of models to train. in the form [("model_name1", model1), ("model_name2", model2), etc]
    """
    file_name = f"{dataset_name}_cats\\{dataset_name}_categories_organised.json"
    f = open(file_name)
    data = json.load(f)
    f.close()
    category_models = {} #this will be returned
    for category in data.keys(): 
        cat_entries = [int(i) for i in data[category].keys() if data[category][i] > confidence]
        
        # skip category if size of category is below limit
        if len(cat_entries) < size_limit:
            print(f"Skipped category: {category} due to low numbers")
            continue
        
        category_data = train_set.filter(axis=0, items=cat_entries)

        #split validation set
        X = category_data.drop('target', axis=1)
        y = category_data["target"]
        X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)

        # skip category if split only has one class
        if (len(np.unique(y_train)) <= 1):
            print(f"Skipped category: {category} due to class issues")
            continue

        X_train_text = np.array([text for text in X_train['e_text']])

        trained_models = []
        # train models from list
        for model_name, model in model_list:
            try:
                optimized = optimize_model(model, X_train_text, y_train, 15)
                best_model = optimized.best_model()['learner'].fit(X_train_text, y_train)
                trained_models.append((model_name, best_model))
            except:
                print(f"Error training {model_name} in category {category}, skipping")
                continue
        
        #get the best model
        X_val_text = np.array([text for text in X_val['e_text']])
        best_model = [0, "x", "x"]
        for model_name, model in trained_models:
            score = model.score(X_val_text, y_val)
            if score > best_model[0]:
                best_model = [score, model_name, model]

        #add best model to list
        category_models[category] = best_model
    return category_models

# voting

In [13]:
def predict_points(trained_models, test_cat_file, X_test):
    """
    predict points using the trained models. returns an array of the predictions

    trained_models: the models trained in each category, in the form {category: [modelscore, modelname, fittedmodel]}
    test_cat_file: the filepath to the organised category file
    X_test: the test set X values
    """
    #load category data
    f = open(test_cat_file)
    category_data = json.load(f)
    f.close()

    # return arrays
    final_predictions = []
    
    #embedded_text = np.array([text for text in X_test['e_text']])
    for index, row in X_test.iterrows():
        test_point = np.array([row['e_text']])
        point_categories = category_data[str(index)]

        # get weights of each point's topic
        topic_weights = {}
        for category in point_categories:
            main_category = category.split("/")[1]
            if main_category not in trained_models.keys():
                continue
            if main_category in topic_weights:
                topic_weights[main_category] += point_categories[category]
            else:
                topic_weights[main_category] = point_categories[category]
        
        #make topic predictions
        model_predictions = []
        for category in topic_weights.keys():
            modelscore, modelname, model = trained_models[category]
            prediction = model.predict(test_point)
            model_predictions.append((prediction[0], modelscore, modelname, category))

        # aggregate predictions
        truefalse_scores = {True: 0, False:0}
        for prediction, modelscore, modelname, category in model_predictions:
            truefalse_scores[prediction] += modelscore
        
        #determine final prediction
        if truefalse_scores[True] > truefalse_scores[False]:
            final_predictions.append(True)
        else:
            final_predictions.append(False)
    return final_predictions

# evaluate results

In [31]:
def check_score(test, pred):
    acc = accuracy_score(test, pred)
    f1 = f1_score(test, pred, average="macro")
    #cm = confusion_matrix(test, pred)
    #disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["true", "false"])
    #disp.plot()
    plt.show() 
    return float("{0:.2f}".format(acc*100)), float("{0:.2f}".format(f1*100))

# pipeline

get dataset -> train models on dataset -> make predictions and vote -> evaluate results

In [113]:
def train_and_evaluate(train_set, test_set, confidence, size_limit, model_list):
    """
    Train and evaluate dataset on test set. Returns results and models
    train_set: training set. In the form ["dataset_name", file_reference_name, dataset]
    test_set: testing set(s). In the form [("dataset_name1", file_reference_name1, dataset1), ("dataset_name2", file_reference_name2, dataset2), etc]
    confidence: confidence threshold to consider a training point
    size_limit: size threshold to consider training a category
    model_list: list of models to train, in the form [("model_name1", model1), ("model_name2", model2), etc]
    """
    #prep dataset
    dataset_name = train_set[0]
    ref_name = train_set[1]
    dataset = train_set[2]
    X = dataset.drop("target", axis=1)
    y = dataset["target"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)
    training_set = pd.concat([X_train, y_train], axis=1)

    #get dataset coverage
    info, coverage = get_category_info(dataset_name, dataset, confidence, size_limit)
    dataset_coverage = coverage[True]

    #train models
    trained_models = train_models(ref_name, training_set, confidence, size_limit, model_list)

    #make predictions on test set
    test_cat_file = f"{ref_name}_categories.json"
    predictions = predict_points(trained_models, test_cat_file, X_test)
    results = check_score(y_test, predictions)

    final_results = []
    final_results.append((dataset_name, results))

    #make predictions on out of domain test sets
    for set_name, ref_name, set_data in test_set:
        test_cat_file = f"{ref_name}_categories.json"
        X = set_data.drop("target", axis=1)
        y = set_data["target"]
        predictions = predict_points(trained_models, test_cat_file, set_data)
        results = check_score(y, predictions)
        final_results.append((set_name, results))
    
    return dataset_coverage, trained_models, final_results

In [114]:
def run_tests(tests, confidence, size_limit, model_list):
    test_results = []
    trained_models = []
    for i in tqdm(range(len(tests))):
        t = tests.copy()
        train = t.pop(i)
        models, results = train_and_evaluate(train, t, confidence, size_limit, model_list)
        test_results.append((train[0], results))
        trained_models.append((train[0], models))
    return trained_models, test_results

# Prep Models

In [54]:
from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK
from sklearn.ensemble import RandomForestClassifier

pheme = get_dataset("pheme")
X = pheme.drop("target", axis=1)
y = pheme["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42, stratify=y)
X_train_text = np.array([text for text in X_train['e_text']])
X_test_text = np.array([text for text in X_test['e_text']])

def objective(search_space):
    model = RandomForestClassifier(**search_space, random_state=42)
    model.fit(X_train_text, y_train)
    y_pred = model.predict(X_test_text)
    accuracy = accuracy_score(y_test, y_pred)
    return {'loss': -accuracy, 'status': STATUS_OK}

# new search space
search_space={  'n_estimators':hp.randint('n_estimators',200,1000),
                'max_depth': hp.randint('max_depth',10,200),                      
                'min_samples_split':hp.uniform('min_samples_split',0,1),   
                'min_samples_leaf':hp.randint('min_samples_leaf',1,10),            
                'criterion':hp.choice('criterion',['gini','entropy']),               
                'max_features':hp.choice('max_features',['sqrt', 'log2']) }
# implement Hyperopt
trials = Trials()
best_params = fmin(
  fn=objective,
  space=search_space,
  algo=tpe.suggest,
  max_evals=2,
  timeout=20,
  trials=trials)

print(space_eval(search_space, best_params))
print(best_params)
print(trials.best_trial['result']['loss'])

100%|██████████| 20/20 [30:26<00:00, 91.34s/trial, best loss: -0.8178988326848249] 


In [77]:
from hpsklearn import sgd_classifier, k_neighbors_classifier, logistic_regression, svc, random_forest_classifier, mlp_classifier, gaussian_nb


model_list = [
    ("SVC", svc("SVC", random_state=42)),
    ("KNN", k_neighbors_classifier("knn")),
    ("Logistic Regression", logistic_regression("LR", random_state=42, solver="saga", penalty=hp.choice("penalty", {None, "l1", "l2"}))),
    ("Random Forest", random_forest_classifier("Random Forest", random_state=42)),
    ("MLP", mlp_classifier("MLP", random_state=42)),
    ("Gaussian NB", gaussian_nb("GNB")),
    ("SGD", sgd_classifier("SGD", random_state=42))
]

In [74]:
from sklearn.linear_model import SGDClassifier

MNB = SGDClassifier().fit(X_train_text, y_train)
pred = MNB.predict(X_test_text)
print(check_score(y_test, pred))

(78.29, 77.22)


# run pipelines

In [45]:
pheme = get_dataset("pheme")
twitter = get_dataset("twitter")
twitter15 = twitter.iloc[:1491]
twitter16 = twitter.iloc[1491:]
weibo = get_dataset("weibo")
weibo = weibo.drop([1933, 3564])

In [79]:
tests = [["PHEME", "pheme", pheme], ["twitter15", "twitter", twitter15], ["twitter16", "twitter", twitter16]]

In [115]:
tests2 = [["PHEME", "pheme", pheme], ["twitterFULL", "twitter", twitter], ["WEIBO", "weibo", weibo]]

In [35]:
coverage, models1, results1 = run_tests(tests2, 0.2, 200, model_list)

100%|██████████| 1/1 [00:02<00:00,  2.97s/trial, best loss: 0.23050847457627122]
100%|██████████| 1/1 [00:01<00:00,  1.92s/trial, best loss: 0.22372881355932206]
100%|██████████| 1/1 [00:02<00:00,  2.02s/trial, best loss: 0.24745762711864405]
100%|██████████| 1/1 [00:02<00:00,  2.28s/trial, best loss: 0.4148606811145511]
100%|██████████| 1/1 [00:01<00:00,  1.78s/trial, best loss: 0.28792569659442724]
100%|██████████| 1/1 [00:02<00:00,  2.42s/trial, best loss: 0.2662538699690402]




  0%|          | 0/1 [00:00<?, ?trial/s, best loss=?]



100%|██████████| 1/1 [00:01<00:00,  1.78s/trial, best loss: 0.23703703703703705]
100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.19999999999999996]
100%|██████████| 1/1 [00:01<00:00,  1.86s/trial, best loss: 0.20740740740740737]




100%|██████████| 1/1 [00:01<00:00,  1.75s/trial, best loss: 0.128]
100%|██████████| 1/1 [00:01<00:00,  1.83s/trial, best loss: 0.11199999999999999]
100%|██████████| 1/1 [00:01<00:00,  1.82s/trial, best loss: 0.11199999999999999]
100%|██████████| 1/1 [00:02<00:00,  2.03s/trial, best loss: 0.28402366863905326]
100%|██████████| 1/1 [00:01<00:00,  1.74s/trial, best loss: 0.3195266272189349]
100%|██████████| 1/1 [00:01<00:00,  1.78s/trial, best loss: 0.28402366863905326]
100%|██████████| 1/1 [00:01<00:00,  1.73s/trial, best loss: 0.17142857142857137]
100%|██████████| 1/1 [00:01<00:00,  1.72s/trial, best loss: 0.17142857142857137]
100%|██████████| 1/1 [00:02<00:00,  2.11s/trial, best loss: 0.1785714285714286]
Skipped category: Books & Literature due to low numbers
Skipped category: Reference due to low numbers
Skipped category: Jobs & Education due to low numbers
Skipped category: Health due to low numbers
Skipped category: Business & Industrial due to low numbers
Skipped category: Autos & V

In [47]:
coverage, models2, results2 = run_tests(tests2, 0.5, 0, model_list)

  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:03<00:00,  3.87s/trial, best loss: 0.29491525423728815]
100%|██████████| 1/1 [00:02<00:00,  2.07s/trial, best loss: 0.21525423728813564]
100%|██████████| 1/1 [00:01<00:00,  1.80s/trial, best loss: 0.25254237288135595]
100%|██████████| 1/1 [00:02<00:00,  2.19s/trial, best loss: 0.2538699690402477]
100%|██████████| 1/1 [00:01<00:00,  1.80s/trial, best loss: 0.23529411764705888]
100%|██████████| 1/1 [00:02<00:00,  2.29s/trial, best loss: 0.24148606811145512]
100%|██████████| 1/1 [00:01<00:00,  1.74s/trial, best loss: 0.2148148148148148]
100%|██████████| 1/1 [00:01<00:00,  1.71s/trial, best loss: 0.19259259259259254]
100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.20740740740740737]
100%|██████████| 1/1 [00:01<00:00,  1.73s/trial, best loss: 0.128]
100%|██████████| 1/1 [00:01<00:00,  1.71s/trial, best loss: 0.128]
100%|██████████| 1/1 [00:01<00:00,  1.87s/trial, best loss: 0.10399999999999998]
100%|██████████| 1/1 [00:02<00:00,  2.29s/trial, best loss

 33%|███▎      | 1/3 [01:20<02:41, 80.74s/it]

100%|██████████| 1/1 [00:01<00:00,  1.71s/trial, best loss: 0.2954545454545454]
100%|██████████| 1/1 [00:01<00:00,  1.87s/trial, best loss: 0.2954545454545454]
100%|██████████| 1/1 [00:01<00:00,  1.79s/trial, best loss: 0.2727272727272727]
100%|██████████| 1/1 [00:01<00:00,  1.73s/trial, best loss: 0.22033898305084743]
100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.3389830508474576]
100%|██████████| 1/1 [00:01<00:00,  1.96s/trial, best loss: 0.23728813559322037]








100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.27118644067796616]
100%|██████████| 1/1 [00:01<00:00,  1.73s/trial, best loss: 0.11864406779661019]
100%|██████████| 1/1 [00:01<00:00,  1.68s/trial, best loss: 0.27118644067796616]
100%|██████████| 1/1 [00:01<00:00,  1.79s/trial, best loss: 0.4285714285714286]
100%|██████████| 1/1 [00:01<00:00,  1.73s/trial, best loss: 0.2941176470588235]
100%|██████████| 1/1 [00:01<00:00,  1.75s/trial, best loss: 0.2773109243697479]
Skipped category: Food & Drink due to low numbers
100%|██████████| 1/1 [00:01<00:00,  1.84s/trial, best loss: 0.25]
100%|██████████| 1/1 [00:01<00:00,  1.82s/trial, best loss: 0.18548387096774188]
100%|██████████| 1/1 [00:01<00:00,  1.99s/trial, best loss: 0.29032258064516125]




100%|██████████| 1/1 [00:01<00:00,  1.86s/trial, best loss: 0.6666666666666667]
100%|██████████| 1/1 [00:02<00:00,  2.12s/trial, best loss: 0.36111111111111116]
100%|██████████| 1/1 [00:02<00:00,  2.02s/trial, best loss: 0.5]
Skipped category: Internet & Telecom due to low numbers
Skipped category: Computers & Electronics due to low numbers
Skipped category: Health due to low numbers
Skipped category: Pets & Animals due to low numbers
Skipped category: Reference due to low numbers
Skipped category: Adult due to low numbers
Skipped category: Business & Industrial due to low numbers
Skipped category: Books & Literature due to low numbers
Skipped category: Jobs & Education due to low numbers
Skipped category: Shopping due to low numbers
Skipped category: Beauty & Fitness due to low numbers
Skipped category: Autos & Vehicles due to low numbers
Skipped category: Science due to low numbers
Skipped category: Finance due to low numbers
Skipped category: Travel & Transportation due to low numbe

 67%|██████▋   | 2/3 [02:28<01:13, 73.21s/it]

100%|██████████| 1/1 [00:01<00:00,  1.76s/trial, best loss: 0.38181818181818183]
100%|██████████| 1/1 [00:01<00:00,  1.71s/trial, best loss: 0.2545454545454545]
100%|██████████| 1/1 [00:01<00:00,  1.89s/trial, best loss: 0.1636363636363637]




100%|██████████| 1/1 [00:01<00:00,  1.77s/trial, best loss: 0.4558823529411765]
100%|██████████| 1/1 [00:01<00:00,  1.67s/trial, best loss: 0.19117647058823528]
100%|██████████| 1/1 [00:02<00:00,  2.01s/trial, best loss: 0.13235294117647056]




Skipped category: Reference due to low numbers




100%|██████████| 1/1 [00:01<00:00,  1.72s/trial, best loss: 0.25806451612903225]
100%|██████████| 1/1 [00:01<00:00,  1.84s/trial, best loss: 0.16129032258064513]
100%|██████████| 1/1 [00:01<00:00,  1.81s/trial, best loss: 0.19354838709677424]
Skipped category: Sports due to low numbers
Skipped category: Games due to low numbers
Skipped category: Travel & Transportation due to low numbers




100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.2666666666666667]
100%|██████████| 1/1 [00:01<00:00,  1.67s/trial, best loss: 0.09999999999999998]
100%|██████████| 1/1 [00:01<00:00,  1.74s/trial, best loss: 0.16666666666666663]
Skipped category: Online Communities due to low numbers




100%|██████████| 1/1 [00:01<00:00,  1.71s/trial, best loss: 0.41666666666666663]
100%|██████████| 1/1 [00:01<00:00,  1.81s/trial, best loss: 0.18055555555555558]
100%|██████████| 1/1 [00:01<00:00,  1.83s/trial, best loss: 0.19444444444444442]




Skipped category: Science due to low numbers





100%|██████████| 1/1 [00:01<00:00,  1.72s/trial, best loss: 0.21621621621621623]
100%|██████████| 1/1 [00:01<00:00,  1.81s/trial, best loss: 0.14414414414414412]
100%|██████████| 1/1 [00:01<00:00,  1.76s/trial, best loss: 0.2792792792792793]
Skipped category: Shopping due to low numbers
Skipped category: Finance due to low numbers
Skipped category: Real Estate due to low numbers
Skipped category: Jobs & Education due to low numbers
100%|██████████| 1/1 [00:01<00:00,  1.74s/trial, best loss: 0.2142857142857143]
100%|██████████| 1/1 [00:01<00:00,  1.70s/trial, best loss: 0.34285714285714286]
100%|██████████| 1/1 [00:01<00:00,  1.77s/trial, best loss: 0.24285714285714288]
Skipped category: Business & Industrial due to low numbers
Skipped category: Computers & Electronics due to low numbers
Skipped category: Internet & Telecom due to low numbers
Skipped category: Hobbies & Leisure due to low numbers
Skipped category: Books & Literature due to low numbers
Skipped category: Beauty & Fitness 

100%|██████████| 3/3 [03:13<00:00, 64.64s/it]


In [None]:
coverage, models3, results3 = run_tests(tests2, 0.3, 50, model_list)

In [None]:
coverage, models4, results4 = run_tests(tests2, 0.5, 0, model_list)

In [52]:
results

[('PHEME',
  [('PHEME', (81.4, 79.98)),
   ('twitterFULL', (55.24, 52.99)),
   ('WEIBO', (50.0, 37.38))]),
 ('twitterFULL',
  [('twitterFULL', (73.59, 73.56)),
   ('PHEME', (54.72, 54.52)),
   ('WEIBO', (50.39, 50.35))]),
 ('WEIBO',
  [('WEIBO', (73.74, 73.55)),
   ('PHEME', (51.11, 48.08)),
   ('twitterFULL', (53.55, 53.29))])]

In [None]:
def organise_results(list_of_results):
    for results in list_of_results:
        for training_set, metrics in results:
            