# Functions used to compute the different scores

They can be moved to a util.py file later.

In [5]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
import pyarrow.feather as feather
import matplotlib.pyplot as plt
import numpy as np

In [6]:
# function for utils later to get the average weights of articles from a DataFrame containing path information

def calculate_avg_article_weights(df, count_cutoff=30, scaling=None):
    """
    Calculate the average weights of articles from a DataFrame containing path information.

    Parameters:
        df (pd.DataFrame): Input DataFrame with the following columns:
            - 'simplified_path': List of articles in the path
            - 'simplified_path_length': Length of the simplified path
            - 'distance': Distance associated with the path
        scaling (str): Type of scaling to use. Options are 'minmax', 'standard', and 'robust' or None
        count_cutoff (int): Minimum number of appearances for an article to be considered

    Returns:
        pd.DataFrame: A DataFrame containing:
            - 'article': Article name
            - 'n_appearances': Number of times the article appeared in paths
            - 'weighted_avg': Weighted average of distances for the article
    """
    # Copy and preprocess the DataFrame
    df = df[['simplified_path', 'simplified_path_length', 'distance']].copy()
    df['simplified_path'] = df['simplified_path'].apply(lambda l: l[1:-1])  # Remove start and end articles

    # Calculate weight for each path
    df['weight'] = df['distance'] / df['simplified_path_length']

    # Initialize an empty DataFrame to store results
    avg_article_weight_df = pd.DataFrame(columns=['article', 'n_appearances', 'weighted_avg'])
    avg_article_weight_df.set_index('article', inplace=True)

    # Iterate through each row to calculate weights
    for _, row in df.iterrows():
        weight = row['weight']
        simplified_path = row['simplified_path']

        for article in simplified_path:
            if article not in avg_article_weight_df.index:
                avg_article_weight_df.loc[article] = [0, 0]

            # Update counts and weighted sums
            avg_article_weight_df.at[article, 'n_appearances'] += 1
            avg_article_weight_df.at[article, 'weighted_avg'] += weight

    # Calculate the weighted average by dividing weighted sum by counts
    avg_article_weight_df['weighted_avg'] = avg_article_weight_df['weighted_avg'] / avg_article_weight_df['n_appearances']

    # Filter out articles that appear less than the cutoff
    avg_article_weight_df = avg_article_weight_df[avg_article_weight_df['n_appearances'] >= count_cutoff]

    # Normalize the weighted average
    if scaling is not None:

        if scaling == 'minmax':
            scaler = MinMaxScaler()
        elif scaling == 'standard':
            scaler = StandardScaler()
        elif scaling == 'robust':
            scaler = RobustScaler()

        avg_article_weight_df[scaling] = scaler.fit_transform(avg_article_weight_df[['weighted_avg']])


    print(f"Number of unique articles after weighting: {avg_article_weight_df.shape[0]}")

    return avg_article_weight_df#.reset_index()


# ------------------------------------------------


# code a function that returns the ratio of the number of times an article appears in unfinished paths over the total number of times it appears

def ratio_unfinished(in_df, count_cutoff=30, scaling=None):
    """
    Calculate the ratio of the number of times an article appears in unfinished paths over the total number of times it appears.

    Parameters:
        df (pd.DataFrame): Input DataFrame with the following columns:
            - 'simplified_path': List of articles in the path
        count_cutoff (int): Minimum number of appearances for an article to be considered
        scaling (str): Type of scaling to use. Options are 'minmax', 'standard', and 'robust' or None

    Returns:
        pd.Series: A Series containing the ratio for each article
    """
    # Copy and preprocess the DataFrame
    df = in_df[['simplified_path', 'finished']].copy()
    df['simplified_path'] = df['simplified_path'].apply(lambda l: l[1:-1])  # Remove start and end articles

    # Initialize a dictionary to store counts
    article_counts = {}
    unfinished_counts = {}

    # Iterate through each row to calculate counts
    for _, row in df.iterrows():
        simplified_path = row['simplified_path']
        finished = row['finished']

        for article in simplified_path:
            article_counts[article] = article_counts.get(article, 0) + 1
        
        if not finished:
            for article in simplified_path:
                unfinished_counts[article] = unfinished_counts.get(article, 0) + 1

    # Convert the dictionary to a Series
    article_counts = pd.Series(article_counts)
    unfinished_counts = pd.Series(unfinished_counts)

    ratio = unfinished_counts / article_counts

    ratio_df = pd.DataFrame({
    'n_appearances': article_counts,
    'unfinished_counts': unfinished_counts,
    'unfinished_ratio': ratio
    }).fillna(0)

    # cut off
    ratio_df = ratio_df[ratio_df['n_appearances'] >= count_cutoff]

    # scaling
    if scaling is not None:
        if scaling == 'minmax':
            scaler = MinMaxScaler()
        elif scaling == 'standard':
            scaler = StandardScaler()
        elif scaling == 'robust':
            scaler = RobustScaler()
        
        ratio_df[scaling] = -scaler.fit_transform(ratio_df[['unfinished_ratio']])

    #print(f"Number of unique articles: {len(article_counts)}")
    print(f"Ratio of unfinished over finished paths: {1-df['finished'].mean()}")
    return ratio_df


# ------------------------------------------------


# code a function that counts the number of dead ends an article has (difference between full path list content and simplified path list content)

def calculate_detour_ratios(in_df, count_cutoff=1, scaling=None):
    """
    Calculate the detour ratio for articles based on the full path and simplified path.

    Parameters:
        in_df (pd.DataFrame): Input DataFrame with the following columns:
            - 'full_path': List of articles in the full path
            - 'simplified_path': List of articles in the simplified path
        count_cutoff (int): Minimum number of detours for an article to be considered.
        scaling (str): Type of scaling to use. Options are 'minmax', 'standard', and 'robust' or None.

    Returns:
        pd.DataFrame: A DataFrame containing the detour ratio and scaled values for each article.
    """
    # Copy and preprocess the DataFrame
    df = in_df[['full_path', 'simplified_path']].copy()
    df['simplified_path'] = df['simplified_path'].apply(lambda l: l[1:-1])  # Remove start and end articles
    df['full_path'] = df['full_path'].apply(lambda l: l[1:-1])  # Remove start and end articles

    # Initialize dictionaries to store counts
    detour_counts = {}
    total_counts = {}

    # Iterate through each row to calculate detour counts and total appearances
    for _, row in df.iterrows():
        full_path = row['full_path']
        simplified_path = row['simplified_path']

        # Count total appearances for articles in the full path
        for article in full_path:
            total_counts[article] = total_counts.get(article, 0) + 1

        # Find detour articles by subtracting the simplified path from the full path
        detour_articles = set(full_path) - set(simplified_path)
        for article in detour_articles:
            detour_counts[article] = detour_counts.get(article, 0) + 1

    # Convert counts to Series
    detour_counts = pd.Series(detour_counts)
    total_counts = pd.Series(total_counts)

    # Fill missing detour counts with 0 for articles with no detours
    detour_counts = detour_counts.reindex(total_counts.index, fill_value=0)

    # Calculate detour ratio
    detour_ratios = detour_counts / total_counts

    # Create a DataFrame with detour counts and ratios
    detour_df = pd.DataFrame({
        'detour_count': detour_counts,
        'total_count': total_counts,
        'detour_ratio': detour_ratios
    }).loc[detour_ratios.index]

    # Filter out articles with detour ratio less than the count_cutoff
    detour_df = detour_df[detour_df['total_count'] >= count_cutoff]

    if scaling is not None:
        # normalize
        if scaling == 'minmax':
            scaler = MinMaxScaler()
        elif scaling == 'standard':
            scaler = StandardScaler()
        elif scaling == 'robust':
            scaler = RobustScaler()

        detour_df[scaling] = -scaler.fit_transform(detour_df[['detour_ratio']])

    print(f"Number of unique articles after detour ratio calculation: {len(detour_df)}")

    return detour_df



# ------------------------------------------------

def calc_avg_article_time(df, count_cutoff=30, scaling=None):
    """
    Calculate the average speed of articles from a DataFrame containing path information.

    Parameters:
        df (pd.DataFrame): Input DataFrame with the following columns:
            - 'simplified_path': List of articles in the path
            - 'durationInSec': Duration associated with the path
        count_cutoff (int): Minimum number of appearances for an article to be considered
        scaling (str): Type of scaling to use. Options are 'minmax', 'standard', and 'robust' or None.

    Returns:
        pd.DataFrame: A DataFrame containing:
            - 'article': Article name
            - 'n_appearances': Number of times the article appeared in paths
            - 'avg_speed': Average speed of the article
    """
    # Copy and preprocess the DataFrame
    df = df[['simplified_path', 'durationInSec']].copy()

    df['simplified_path'] = df['simplified_path'].apply(lambda l: l[1:-1])  # Remove start and end articles

    # Initialize an empty DataFrame to store results
    avg_article_speed_df = pd.DataFrame(columns=['article', 'n_appearances', 'avg_speed'])
    avg_article_speed_df.set_index('article', inplace=True)

    # Iterate through each row to calculate speeds
    for _, row in df.iterrows():
        speed = row['durationInSec']
        simplified_path = row['simplified_path']

        for article in simplified_path:
            if article not in avg_article_speed_df.index:
                avg_article_speed_df.loc[article] = [0, 0]

            # Update counts and sums
            avg_article_speed_df.at[article, 'n_appearances'] += 1
            avg_article_speed_df.at[article, 'avg_speed'] += speed

    # Calculate the average speed by dividing sum by counts
    avg_article_speed_df['avg_speed'] = avg_article_speed_df['avg_speed'] / avg_article_speed_df['n_appearances']

    # Filter out articles that appear less than the cutoff
    avg_article_speed_df = avg_article_speed_df[avg_article_speed_df['n_appearances'] >= count_cutoff]

    if scaling is not None:
        # Normalize the average speed
        if scaling == 'minmax':
            scaler = MinMaxScaler()
        elif scaling == 'standard':
            scaler = StandardScaler()
        elif scaling == 'robust':
            scaler = RobustScaler()
        
        avg_article_speed_df[scaling] = -scaler.fit_transform(avg_article_speed_df[['avg_speed']])

    print(f"Number of unique articles after time calc: {avg_article_speed_df.shape[0]}")

    return avg_article_speed_df#.reset_index()


# COMMENT: could consider really computing the speed instead of the duration. speed = distance / time and then sum up and average.

### And a function for data filtering based on time aspect

In [7]:
def filter_duration(df):
    """
    Filter the DataFrame based on the distance and duration bounds using the IQR method. And downsample to one IpAdress per identifier.

    Parameters:
        df (pd.DataFrame): Input DataFrame with the following columns:
            - 'distance': Distance associated with the path
            - 'durationInSec': Duration associated with the path

    Returns:
        pd.DataFrame: Filtered DataFrame
    """
    filtered_dfs = []  # List to hold filtered data for each distance group

    for d in range(1, int(df['distance'].max()) + 1):
        # Filter the DataFrame for the current distance group
        df_d = df[df['distance'] == d]

        # Compute IQR for 'durationInSec'
        Q1 = df_d['durationInSec'].quantile(0.25)
        Q3 = df_d['durationInSec'].quantile(0.75)
        IQR = Q3 - Q1

        # Calculate upper bound based on IQR
        upper_bound = Q3 + 1.5 * IQR

        # Keep only rows within the upper bound
        filtered_df_d = df_d[df_d['durationInSec'] <= upper_bound]

        # Append filtered group to the list
        filtered_dfs.append(filtered_df_d)

    # Concatenate all filtered groups
    filtered_df = pd.concat(filtered_dfs, ignore_index=True)
    
    # downsample data to one IpAdress per identifier
    downsampled_df = filtered_df.groupby(['hashedIpAddress', 'identifier']).sample(n=1, random_state=42)

    # Calculate the number of removed rows
    removed = df.shape[0] - downsampled_df.shape[0]

    # Print the result
    print(f"In sampling a total of {removed} samples were removed, "
        f"which represents {removed / df.shape[0] * 100:.3f}% of the original data.",
        f"{df.shape[0]} samples remain.")

    return downsampled_df

## Make a composite df with all the different scores

In [8]:
filtered_paths = feather.read_feather('Data/dataframes/filtered_paths.feather')

In [9]:
finished_paths = filtered_paths[filtered_paths['finished']]

# downsample data to one IpAdress per identifier
# this way players can't just learn paths and then play them as fast as possible
finished_paths = finished_paths.groupby(['hashedIpAddress', 'identifier']).sample(n=1, random_state=42)

weight_df = calculate_avg_article_weights(finished_paths, count_cutoff=30, scaling='standard')
time_df = calc_avg_article_time(filter_duration(finished_paths), count_cutoff=30, scaling='standard')
unfinished_atio_df = ratio_unfinished(filtered_paths, count_cutoff=30, scaling='standard')
detour_ratio_df = calculate_detour_ratios(finished_paths, count_cutoff=30, scaling='standard')

  avg_article_weight_df.at[article, 'weighted_avg'] += weight


Number of unique articles after weighting: 820
In sampling a total of 2471 samples were removed, which represents 5.437% of the original data. 45451 samples remain.
Number of unique articles after time calc: 776
Ratio of unfinished over finished paths: 0.1762317738926128
Number of unique articles after detour ratio calculation: 871


In [26]:
# Combine the metrics into a composite score
composite_df = pd.DataFrame(index=weight_df.index)
composite_df['weight_avg_scaled'] = weight_df['standard']
composite_df['weight_avg'] = weight_df['weighted_avg']

composite_df['unf_ratio_scaled'] = unfinished_atio_df['standard']
composite_df['unfinished_ratio'] = unfinished_atio_df['unfinished_ratio']

composite_df['detour_ratio_scaled'] = detour_ratio_df['standard']
composite_df['detour_ratio'] = detour_ratio_df['detour_ratio']

composite_df['avg_speed_scaled'] = time_df['standard']
composite_df['avg_speed'] = time_df['avg_speed']

composite_df

# | article | weighted_avg | avg_speed | unfinished_ratio | detour_ratio |
# |---------|bigger better |small better|   small better  | small better |

Unnamed: 0_level_0,weight_avg_scaled,weight_avg,unf_ratio_scaled,unfinished_ratio,detour_ratio_scaled,detour_ratio,avg_speed_scaled,avg_speed
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Philosophy,0.924846,0.643279,0.213956,0.186321,0.146164,0.056338,0.245149,132.319489
Mathematics,0.893903,0.641757,0.896721,0.127168,1.029241,0.013699,0.200423,133.090239
Arithmetic,1.100646,0.651925,0.953803,0.122222,-0.221156,0.074074,-0.575692,146.464789
North_Africa,-0.605665,0.568005,-0.090040,0.212658,0.155223,0.055901,-0.060291,137.583039
Africa,0.907139,0.642408,0.529661,0.158969,0.973283,0.016401,0.828117,122.273364
...,...,...,...,...,...,...,...,...
United_States_Senate,-1.257078,0.535967,-1.073610,0.297872,-2.311367,0.175000,1.575148,109.400000
Cheese,2.737321,0.732421,0.492802,0.162162,0.644868,0.032258,,
Nobel_Peace_Prize,0.919959,0.643039,0.569058,0.155556,0.193466,0.054054,0.610105,126.030303
Triassic,-0.615929,0.567500,-0.299084,0.230769,-1.645677,0.142857,,


In [27]:
# rank by highest wieght (remember weight for an article is the average of (distance / simplified_path_length) over all the paths it appears in)
composite_df.sort_values(by='weight_avg', ascending=False)

Unnamed: 0_level_0,weight_avg_scaled,weight_avg,unf_ratio_scaled,unfinished_ratio,detour_ratio_scaled,detour_ratio,avg_speed_scaled,avg_speed
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Achilles,4.811790,0.834448,0.850784,0.131148,1.312944,0.000000,2.045308,101.297872
J._K._Rowling,4.044191,0.796696,1.006611,0.117647,0.949604,0.017544,1.277557,114.528302
Mario,3.390951,0.764568,0.859010,0.130435,0.277426,0.050000,-0.830807,150.861111
Harry_Potter,3.188972,0.754634,0.565727,0.155844,1.312944,0.000000,0.005016,136.457627
Lead,3.156074,0.753016,0.492802,0.162162,-3.047129,0.210526,,
...,...,...,...,...,...,...,...,...
Anatomy,-2.276821,0.485813,-2.486595,0.420290,-0.569815,0.090909,-0.853373,151.250000
Irrigation,-2.391556,0.480171,-1.989156,0.377193,-0.646143,0.094595,0.076705,135.222222
Gas,-2.539718,0.472884,-0.473747,0.245902,-3.601376,0.237288,0.609033,126.048780
Atheism,-2.545097,0.472619,0.265926,0.181818,-0.412918,0.083333,,


In [30]:
# sort by unfinished ratio (ratio of the number of times an article appears in unfinished paths over the total number of times it appears)
composite_df.sort_values(by='unf_ratio_scaled', ascending=False)

Unnamed: 0_level_0,weight_avg_scaled,weight_avg,unf_ratio_scaled,unfinished_ratio,detour_ratio_scaled,detour_ratio,avg_speed_scaled,avg_speed
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Frog,1.556368,0.674339,2.364533,0.000000,0.644868,0.032258,,
Australian_Green_Tree_Frog,1.008014,0.647369,2.364533,0.000000,0.795185,0.025000,-0.458453,144.444444
List_of_countries,0.528985,0.623810,2.096106,0.023256,1.312944,0.000000,0.741666,123.763158
Periodic_table,1.216158,0.657606,2.081005,0.024564,1.089883,0.010771,0.671913,124.965187
Kuwait,-0.397505,0.578243,2.043912,0.027778,0.193466,0.054054,0.504872,127.843750
...,...,...,...,...,...,...,...,...
Fiction,-0.996283,0.548793,-3.030255,0.467391,-1.593772,0.140351,-1.217438,157.523810
The_Simpsons,0.802556,0.637264,-3.148226,0.477612,-0.988206,0.111111,,
Sport,-0.963159,0.550422,-3.551640,0.512563,-2.288856,0.173913,-0.649759,147.741176
Mexico_City,-0.744954,0.561154,-3.581520,0.515152,-0.311397,0.078431,0.022545,136.155556


In [31]:
# sort by detour ratio (ratio of the number of dead ends an article has (difference between full path list content and simplified path list content))
composite_df.sort_values(by='detour_ratio_scaled', ascending=False)

Unnamed: 0_level_0,weight_avg_scaled,weight_avg,unf_ratio_scaled,unfinished_ratio,detour_ratio_scaled,detour_ratio,avg_speed_scaled,avg_speed
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Detroit,_Michigan",0.848160,0.639507,0.748606,0.140000,1.312944,0.000000,-3.627169,199.050000
Star_Wars,0.208158,0.608031,-0.185519,0.220930,1.312944,0.000000,-1.352424,159.850000
Star_Wars_Episode_IV__A_New_Hope,1.880626,0.690286,0.973890,0.120482,1.312944,0.000000,-1.333928,159.531250
History_of_the_world,-0.448221,0.575748,-1.186956,0.307692,1.312944,0.000000,0.854531,121.818182
Bread,-0.056651,0.595007,1.498857,0.075000,1.312944,0.000000,-0.438278,144.096774
...,...,...,...,...,...,...,...,...
Yellowstone_National_Park,-0.989639,0.549120,-2.118855,0.388430,-3.289356,0.222222,-1.589729,163.939394
Eukaryote,-1.550714,0.521525,-2.631404,0.432836,-3.433178,0.229167,-0.361369,142.771429
Gas,-2.539718,0.472884,-0.473747,0.245902,-3.601376,0.237288,0.609033,126.048780
DVD,-1.089802,0.544194,-2.182449,0.393939,-3.966165,0.254902,-0.013911,136.783784


In [24]:
# sort by speed (so far speed is just avg path time over all the paths an article appears in)
composite_df.sort_values(by='avg_speed_scaled', ascending=False)

Unnamed: 0_level_0,weight_avg_scaled,weight_avg,unf_ratio_scaled,unfinished_ratio,detour_ratio__scaled,detour_ratio,avg_speed_scaled,avg_speed
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
North_Korea,0.431010,0.618991,-0.153795,0.218182,-0.890285,0.106383,3.003798,84.780488
Old_English_language,0.054290,0.600463,-1.878974,0.367647,-0.487956,0.086957,2.821436,87.923077
Korea,1.232629,0.658416,0.748606,0.140000,0.842254,0.022727,2.687098,90.238095
Suez_Canal,0.080033,0.601729,0.979452,0.120000,-1.223017,0.122449,2.453527,94.263158
President_of_the_United_States,1.664244,0.679644,1.205954,0.100376,0.936918,0.018156,2.117642,100.051360
...,...,...,...,...,...,...,...,...
Welding,1.771787,0.684933,0.542058,0.157895,1.312944,0.000000,,
List_of_rivers_by_length,1.915145,0.691984,1.006611,0.117647,0.018547,0.062500,,
Oxford,-0.075141,0.594097,-0.456928,0.244444,-1.957111,0.157895,,
Cheese,2.737321,0.732421,0.492802,0.162162,0.644868,0.032258,,


## What now?

It would be cool to **get a composite score that incorporates all the metrics but what weight do we give each individual metric...?**
- PCA
- Weighted average

We can also seperate the game into two main objectives:
- **Reach your target in the least possible amount of clicks**.
    consider the following metrics
    - weighted_avg score
    - detour ratio 
    - maybe unfinished ratio
- **Reach your target as fast as possible**
    only really interested in time metric

Then we can test what article attributes correlate the most with high scores. And if they are similar for clicks and time.

Note that these metrics are more focused on article 'quality'. what I mean by that is that it is not the most important articles in the 'Network' (i.e. those that are the most used by players) that will have the highest scores.


**TO DO**

- code the centered sum of weights function

- explain exactly each metric

- compute composite scores 
     

In [36]:

from sklearn.decomposition import PCA

# Ensure all metrics are such that "bigger is better"
composite1_df = composite_df[['weight_avg_scaled', 'unf_ratio_scaled', 'detour_ratio_scaled']].copy()

# Apply PCA to get a single composite score
pca = PCA(n_components=1)
composite1_df['composite_score'] = pca.fit_transform(composite1_df)
composite1_df.sort_values(by='composite_score', ascending=False)



Unnamed: 0_level_0,weight_avg_scaled,unf_ratio_scaled,detour_ratio_scaled,composite_score
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Achilles,4.811790,0.850784,1.312944,4.158275
J._K._Rowling,4.044191,1.006611,0.949604,3.559448
Algebra,2.421177,1.664997,1.312944,3.068307
Harry_Potter,3.188972,0.565727,1.312944,2.952162
Parrot,2.304506,1.180703,1.312944,2.723494
...,...,...,...,...
Sport,-0.963159,-3.551640,-2.288856,-3.903475
DVD,-1.089802,-2.182449,-3.966165,-4.100757
Eukaryote,-1.550714,-2.631404,-3.433178,-4.369271
Optical_fiber,-1.738252,-0.867322,-5.713781,-4.701717


In [38]:
# Ensure all metrics are such that "bigger is better"
composite2_df = composite_df[['weight_avg_scaled', 'detour_ratio_scaled']].copy()

# Apply PCA to get a single composite score
pca = PCA(n_components=1)
composite2_df['composite_score'] = pca.fit_transform(composite2_df)
composite2_df.sort_values(by='composite_score', ascending=False)

Unnamed: 0_level_0,weight_avg_scaled,detour_ratio_scaled,composite_score
article,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Achilles,4.811790,1.312944,4.630274
J._K._Rowling,4.044191,0.949604,3.794620
Harry_Potter,3.188972,1.312944,3.310698
Mario,3.390951,0.277426,2.872191
Algebra,2.421177,1.312944,2.686373
...,...,...,...
Herbivore,-1.964565,-2.680531,-3.204325
DVD,-1.089802,-3.966165,-3.241349
Eukaryote,-1.550714,-3.433178,-3.305899
Gas,-2.539718,-3.601376,-4.208000
