STEP 1, DEFINE THE METHODS USED TO GENERATE:
1) THE FORMATTED DATA USED BY THE SANKEY GENERATOR
2) The basic version of the sankey diagram for a given bias
3) A wrapper to generate these sankey diagrams for target biases

In [None]:
# define imports
import pandas as pd
import numpy as np
import floweaver as fw
import ipysankeywidget
import lxml.etree as ET
from bs4 import BeautifulSoup
import pickle
import json
import os

TOP_N = 10
cutoff = 50
biases = ['left extreme', 'left leaning', 'left', 'center', 'right leaning', 'right', 'right extreme', 'fake']
palette = {'right': '#8F100B', 'right leaning': '#DB4742', 'center': '#CFDB00', 'left leaning': '#4495DB',
           'left': '#0E538F', 'left extreme': '#082E4F', 'right extreme': '#4F0906', 'fake': '#282828'}
bias_to_plot_biases = {**{b: [b] for b in biases}, **{'lefts': ['left', 'left leaning'], 'rights': ['right', 'right leaning'], 'fake_extreme_right': ['fake', 'right extreme']}}

# DEFINE PATHS TO DATA EDIT IF NEEDED
TOP_INFLUENCERS = '../data/influencers'
USER_MAPS = '../data/maps/'
AFFILIATION = '../data/affiliations'

# also, ensure the output directory exists
if not os.path.exists('./output'):
    os.makedirs('./output')

BEGIN DATA TRANSFORM SECTION

Goal: Take in the top influencer data and create the dataframes for each bias type.

NOTE: Please read the README to find out how to generate the necessary input data to this script.

In [None]:
def load_data():
    biased_dfs = {}
    user_by_bias = {}
    # first we load the influencer data for each media type
    for bias in biases:
        #biased_dfs[bias] = pd.read_csv('data/top_influencers_{}.csv'.format(bias))
        biased_dfs[bias] = pd.read_csv(os.path.join(TOP_INFLUENCERS, 'top_influencers_{}.csv'.format(bias)))

        # Here we add back in @RealAlexJones, who was highly ranked in 2016 but whose account was removed before 2020
        # the removal means it was not pulled into the top influencer dataframes we just loaded
        if bias == 'fake': 
            biased_dfs['fake'] = pd.concat([biased_dfs['fake'], pd.DataFrame([{'user_id': 0, 'CI_2016': 0, 'CI_2020': 0, 'rank_2016': 2.0, 
                    'bias': 'fake', 'user_handle': 'RealAlexJones', 'verified': 1}])])
        user_by_bias[bias] = biased_dfs[bias][(biased_dfs[bias].rank_2016 <= TOP_N)|(biased_dfs[bias].rank_2020 <= TOP_N)].user_handle.values

    # concatenate the dataframes for each media type into one merged frame
    merged = pd.concat([x for x in biased_dfs.values()])

    # add missing data from 2020
    merged = pd.concat([merged, pd.DataFrame([{'user_id': 0, 'CI_2016': 0, 'CI_2020': 0, 'rank_2016': 2.0, 
                    'bias': 'fake', 'user_handle': 'RealAlexJones', 'verified': 1}])])

    # here we add 3 custom merged media types. These will display the rank transitions for pairs of media types
    # which will make the final plot more compact
    biased_dfs['fake_extreme_right'] = pd.concat([biased_dfs['right extreme'], biased_dfs['fake']])
    biased_dfs['fake_extreme_right'].sort_values(by='rank_2016')
    user_by_bias['fake_extreme_right'] = biased_dfs['fake_extreme_right'][(biased_dfs['fake_extreme_right'].rank_2016 <= TOP_N)|(biased_dfs['fake_extreme_right'].rank_2020 <= TOP_N)].user_handle.values

    biased_dfs['lefts'] = pd.concat([biased_dfs['left'], biased_dfs['left leaning']])
    user_by_bias['lefts'] = biased_dfs['lefts'][(biased_dfs['lefts'].rank_2016 <= TOP_N)|(biased_dfs['lefts'].rank_2020 <= TOP_N)].user_handle.values

    biased_dfs['rights'] = pd.concat([biased_dfs['right'], biased_dfs['right leaning']])
    user_by_bias['rights'] = biased_dfs['rights'][(biased_dfs['rights'].rank_2016 <= TOP_N)|(biased_dfs['rights'].rank_2020 <= TOP_N)].user_handle.values

    # compute positions and change in position
    merged['position_2016'] = np.ceil(np.log2((merged.rank_2016 + 1)))
    merged.position_2016 = merged.position_2016.clip(upper=6)
    merged.position_2016 = merged.position_2016.replace(np.nan, 6)
    merged['position_2020'] = np.ceil(np.log2((merged.rank_2020 + 1)))
    merged.position_2020 = merged.position_2020.clip(upper=6)
    merged.position_2020 = merged.position_2020.replace(np.nan, 6)
    merged['delta_position'] = merged.position_2016 - merged.position_2020
    return merged, user_by_bias


Next we create a method called "format_data". This method will ingest the merged data produced in the load step as
well as a set of biases to target and will produce a dataframe formatted for use in the sankey generator.

In [None]:
def format_data(top_merged, plot_biases):
    user_to_best = {}
    for i, row in top_merged.iterrows():
        user = row['user_handle']
        rank_2016 = row['rank_2016'] if not np.isnan(row['rank_2016']) else np.inf
        rank_2020 = row['rank_2020'] if not np.isnan(row['rank_2020']) else np.inf
        if user in user_to_best:
            # check if we should update
            if rank_2016 == user_to_best[user]['rank_2016']:
                if row['bias'] in plot_biases:
                    user_to_best[user]['rank_2016'] = rank_2016
                    user_to_best[user]['bias_2016'] = row['bias']
            elif rank_2016 < user_to_best[user]['rank_2016']:
                user_to_best[user]['rank_2016'] = rank_2016
                user_to_best[user]['bias_2016'] = row['bias']
            
            if rank_2020 == user_to_best[user]['rank_2020']:
                if row['bias'] in plot_biases:
                    user_to_best[user]['rank_2020'] = rank_2020
                    user_to_best[user]['bias_2020'] = row['bias']
            elif rank_2020 < user_to_best[user]['rank_2020']:
                if row['bias'] in plot_biases:
                    user_to_best[user]['rank_2020'] = rank_2020
                    user_to_best[user]['bias_2020'] = row['bias']
                if user_to_best[user]['rank_2020'] <= 10 and user_to_best[user]['bias_2020'] in plot_biases:
                    continue
                if user_to_best[user]['bias_2020'] == 'center' and user_to_best[user]['rank_2020'] <= 10:
                    continue
                user_to_best[user]['rank_2020'] = rank_2020
                user_to_best[user]['bias_2020'] = row['bias']
            else:
                if user_to_best[user]['bias_2020'] not in plot_biases and rank_2020 <= 10 and row['bias'] in plot_biases:
                    user_to_best[user]['rank_2020'] = rank_2020
                    user_to_best[user]['bias_2020'] = row['bias']
        else:
            user_to_best[user] = {
                'user_handle': user,
                'rank_2016': rank_2016, 
                'bias_2016': row['bias'], 
                'rank_2020': rank_2020, 
                'bias_2020': row['bias']
            }

    new_merged = pd.DataFrame(list(user_to_best.values()))
    rank_to_str = lambda rank: 'rank > {}'.format(cutoff) if (np.isnan(rank) or rank>cutoff) else str(int(rank))
    new_merged['rank_2016_str'] = new_merged.rank_2016.map(rank_to_str)
    new_merged['rank_2020_str'] = new_merged.rank_2020.map(rank_to_str)
    return new_merged


BEGIN SANKEY SECTION:

GOAL: Generate the initial form of the sankey diagram, will contain all the information of each individual subplot
      of figure 4, but will require a little prettifying in a post-processing step to be paper ready.

NOTES: 
1) This method requires being run in a jupyter notebook due to the underlying package we use to create the sankey diagram
   "floweaver". It produces a widget that is only rendered in the notebook. There is no way to convert the diagram 
   directly into an svg file, it must be rendered as a widget and saved as an svg.

In [None]:
def plot_intermediate_sankey(new_merged, bias):
    rank_to_str = lambda rank: 'rank > {}'.format(cutoff) if (np.isnan(rank) or rank>cutoff) else str(int(rank))
    rows = []
    unique_r2016 = []
    unique_r2020 = []
    for index, (user, r2016, bias2016, r2020, bias2020, r2016_sr, r2020_str) in new_merged.iterrows():
        if r2016 <= cutoff:
            unique_r2016.append(r2016)
        if r2020 <= cutoff:
            unique_r2020.append(r2020)
        rows.append({'source': user, 'target': r2020_str, 'type': bias2020, 'value': 1})

    unique_r2016 = np.unique(unique_r2016)
    unique_r2020 = np.unique(unique_r2020)
        
    flows = pd.DataFrame(rows)
    size = dict(width=1600, height=1200)

    nodes = {
        'users': fw.ProcessGroup([x for x in new_merged.user_handle.unique()]),
        '2020': fw.ProcessGroup([rank_to_str(rank) for rank in new_merged.rank_2020.unique()]),
    }

    ordered_users = list(new_merged.sort_values(by=['rank_2016', 'user_handle']).drop_duplicates('user_handle', keep='first').user_handle)
    users_partition = fw.Partition.Simple('process', ordered_users)
    sorted_rank_2020 = [rank_to_str(rank) for rank in np.sort(unique_r2020)]
    rank_partition_2020 = fw.Partition.Simple('process', sorted_rank_2020)
    nodes['users'].partition = users_partition
    nodes['2020'].partition = rank_partition_2020

    ordering = [
        ['users'],
        ['2020']
    ]
    bias_types = fw.Partition.Simple('type', biases)

    bundles = [
        fw.Bundle('users', '2020')
    ]

    # generate our figure svg
    margins = dict(top=0, bottom=0, left=360, right=160)
    sdd = fw.SankeyDefinition(nodes, bundles, ordering, flow_partition=bias_types)
    res = fw.weave(sdd, flows, palette=palette).to_widget(**size, margins=margins).auto_save_svg('output/TopUsers_{}_intermediate.svg'.format(bias))
    return sdd, flows


The following section defines some helpufl functions and variables for use in post-processing the intermediary sankey
diagrams created in the previous step. This includes pre-defining some shapes for identifying media type of users,
mapping biases to subfigure titles, getting maps of user to different svg elements, etc.

In [None]:
# define some helper functions for post processing our intermediary svg
media_type_to_shape = {
    'independent': '<polygon points="-6.959 10.34,-2.099 9.74,0.001 5,2.161 9.74,6.961 10.34,3.481 14,4.321 19.1,0.001 16.7,-4.259 19.1,-3.419 14.06" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(-200,0)"/>',  # star
    'other': '<polygon points="16 13.85641,0 13.85641,8 0" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(-208,0)"></polygon>',  # triangle
    'media': '<circle cx="0" cy="10" r="8" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(-200,0)"/>',  # circle
    'political': '<rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(-208,0)"/>' # square
}

bias_to_title = {'lefts': 'Left/left leaning', 'rights': 'Right/right leaning', 'fake_extreme_right': 'Extreme bias right/fake news'}
affiliation_map = {'media_2020': 'media', 'media_2016': 'media', 'media_both': 'media',
                   'polit_2020': 'political', 'polit_2016': 'political', 'polit_both': 'political',
                   'other_2020': 'other', 'other_2016': 'other', 'other_both': 'other',
                   'indep_2020': 'independent', 'indep_2016': 'independent', 'indep_both': 'independent'}


def get_user_to_node(svg_root):
    user_to_node = {}
    nodes = svg_root.find_all('g', class_='node')
    for node in nodes:
        user = node.find_all('text', class_='node-title')[0].string
        user_to_node[user] = node
    return user_to_node

def get_user_to_link(svg_root):
    user_to_link = {}
    links = svg_root.find_all('g', class_='link')
    for link in links:
        user = link.title.text.split(' ')[0]
        user_to_link[user] = link
    return user_to_link


def create_user_to_media_type():
    with open(os.path.join(USER_MAPS, 'user_map_2016.pkl'), 'rb') as file:
        m2016 = pickle.load(file)
    with open(os.path.join(USER_MAPS, 'user_map_2020.pkl'), 'rb') as file:
        m2020 = pickle.load(file)

    # load affiliation map
    with open(os.path.join(AFFILIATION, 'infl_affiliation_map.json')) as file:
        infl_affiliation = json.load(file)
        infl_affiliation = {int(key): val for key, val in infl_affiliation.items()}

    with open(os.path.join(AFFILIATION, 'infl_affiliation_map_no_handles.json')) as file:
        infl_affiliation_no_handle = json.load(file)
        infl_affiliation_no_handle = {int(key): val for key, val in infl_affiliation_no_handle.items()}
    
    m2016_2020 = {**m2016, **m2020}
    user_to_fig_text = {m2016_2020[k]['name']: val for k, val in infl_affiliation.items() if k in m2016_2020}

    infl_name_to_affiliation = {m2016_2020[k]['name']: val for k, val in infl_affiliation_no_handle.items() if k in m2016_2020}

    user_to_affiliation = {}
    for k,v in infl_name_to_affiliation.items():
        user_to_affiliation[k] = affiliation_map[v]

    return user_to_fig_text, user_to_affiliation



The final step for each subfigure is to post process it. This step adds the following to the plot:
1) Anonymize the user handles as required by journal
2) Add icons to indicate which media type each user belonged to
3) Add bounding rectangles around groups of users by their 2016 rank. In the intermediary step all users were sorted
   by rank, but no visibal indication of rank exists. This step bounds all users of the same rank in a grey rectangle
   and then labels the rectangle with the rank obtained in 2016.
4) Adds a gradient to the flows of participants who either changed media type from 2016 - 2020 or to indicate that the
   media type for a users highest obtained rank in 2016 differed from their highest obtained rank in 2020.
   By default, all flow colors are those of the media-type of the highest obtained rank in 2020. This change makes the 
   color of the flow a gradient from the media type of the highest 2016 rank to the media-type of the highest 2020 rank.

In [None]:
def post_process_intermediary_plot(bias, new_merged):
    # load the previously saved intermediary svg file
    xml = ET.parse('output/TopUsers_{}_intermediate.svg'.format(bias))
    xmlstr = ET.tostring(xml.getroot(), pretty_print=True, encoding='unicode')
    mysvg = BeautifulSoup(xmlstr, 'lxml')#.svg.extract()

    user_to_fig_text, user_to_media_type = create_user_to_media_type()
    user_to_node = get_user_to_node(mysvg)
    user_to_link = get_user_to_link(mysvg)
    rect_group = BeautifulSoup('<g class="node_group"> </g>', 'lxml').g.extract()

    diagram_font_weight = "bold"
    diagram_font_size = "18px"

    for rank in np.unique(new_merged.rank_2016_str):
        rows_of_concern = new_merged[new_merged.rank_2016_str == rank]
        miny = np.inf
        maxy = -np.inf
        for i, row in rows_of_concern.iterrows():
            user = row['user_handle']
            
            # go into the svg file and find the node with title of user
            node = user_to_node[user]
            
            # modify node to include the symbol for given user
            ntext = node.find_all('text', class_='node-title')[0]
            ntext.string.replace_with(user_to_fig_text[user])
            media_type = user_to_media_type[user]
            shape = media_type_to_shape[media_type]
            if media_type == 'political':
                shape = BeautifulSoup(shape).rect.extract()
            elif media_type == 'independent' or media_type == 'other':
                shape = BeautifulSoup(shape).polygon.extract()
            elif media_type == 'media':
                shape = BeautifulSoup(shape).circle.extract()

            node.append(shape)

            # get the translate values
            node_transform = node['transform'].replace(')', '')
            x, y = node_transform.split('(')[1].split(',')
            x = float(x)
            y = float(y)

            # set the y position of our new rectangle to the y value
            if y < miny:
                miny = y
            if y > maxy:
                maxy = y
            
            # modify color gradient for link *note we do this because some users changed media type from 2016 to 2020,
            # like CNN, the gradient changes from center to left leaning to indicate this.
            link = user_to_link[user]
            bias_2016 = row['bias_2016']
            bias_2020 = row['bias_2020']
            color_2016 = palette[bias_2016]
            color_2020 = palette[bias_2020]
            id = 'grad_{}'.format(user)
            gradient_xml = '<defs><linearGradient id="{}"><stop offset="0%" stop-color="{}"/><stop offset="100%" stop-color="{}" /></linearGradient></defs>'.format(
                id, color_2016, color_2020
            )
            gradient = BeautifulSoup(gradient_xml, 'lxml').defs.extract()
            link.insert(0, gradient)
            del link.path['style']
            link.path['fill'] = "url(#{})".format(id)

        # we now have the y value and the height of the rectangle, we now create it and move on to the next rank
        height = maxy - miny + 20

        # insert the nodes group, this groups our users by their ranking, adding a box around users of the same rank
        # and labelling the box with the shared rank 
        rank_group = BeautifulSoup('<g class="rank_group" transform="translate(-320, {})"></g>"'.format(miny), 'lxml').g.extract() #-300
        rect_xml = '<rect width="320" height="{}" x="0" y=0 style="fill:rgb(236,236,236)" >'.format(height)
        rect = BeautifulSoup(rect_xml).rect.extract()
        rank_group.append(rect)
        # add a label to the rank group rect that denotes the 2016 rank of the users in the group
        label_xml = '<text x=0 y={} font-size="{}" font-weight="{}">{}</text>'.format((height/2)+7, diagram_font_size, 
                                                                                    diagram_font_weight, rank) 
        label = BeautifulSoup(label_xml, 'lxml')
        label = [x for x in label.descendants][-2]
        rank_group.append(label)
        rect_group.append(rank_group)
        
    # insert the rank grouping elements into the sankey graph group
    mysvg.find_all('g', class_='sankey')[0].insert(0, rect_group)

    # set the node-title text attributes, this will modify the font size and weight for all ranks and users
    for ntext in mysvg.find_all('text', class_='node-title'):
        ntext['font-size'] = diagram_font_size
        ntext['font-weight'] = diagram_font_weight
        if ntext.string == '_' or 'rank' in ntext.string:
            ntext.string.replace_with('Rank > 50')


    # add the header information
    text_xml = '<text x="{}" y="{}" text-anchor="{}" font-size="{}">{}</text>'
    r2016_xml = text_xml.format('48', '0', 'start', '30px', 'Rank 2016') #3%
    user_xml = text_xml.format('360', '0', 'end', '30px', 'User') #
    r2020_xml = text_xml.format('1520', '0', 'end', '30px', 'Rank 2020') #95%
    title_xml = text_xml.format('800', '-35', 'middle', '45px', bias_to_title[bias]) #50%
    header_group = BeautifulSoup('<g class="header" transform="translate(0, 100)">{}{}{}{}</g>'.format(
        r2016_xml, user_xml, title_xml, r2020_xml), 'lxml').g.extract()

    # add the header information to the svg
    mysvg.svg.insert(0, header_group)

    # BeautifulSoup is writing the linearGradient tag incorrectly, so we will correct that before saving
    out = mysvg.svg.extract().prettify()
    out = out.replace("lineargradient", "linearGradient")

    # save file
    with open('output/TopUsers_{}.svg'.format(bias), "w") as file:
        file.write('<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">')
        file.write(str(out))
    return out

Next we will generate the intermediary sankey plots for each of the target bias groupings (lefts, rights, fake_extreme_right)

In [None]:

# plot lefts
bias = 'lefts'
merged, user_by_bias = load_data()
top_merged = merged[merged.user_handle.isin(user_by_bias[bias])][['user_handle', 'rank_2016', 'rank_2020', 'bias']]
plot_biases = bias_to_plot_biases[bias]
new_merged = format_data(top_merged, plot_biases)
merged_data = {bias: new_merged}
size = dict(width=1600, height=1200)
margins = dict(top=0, bottom=0, left=360, right=160)
sdd, flows = plot_intermediate_sankey(new_merged, bias)
fw.weave(sdd, flows, palette=palette).to_widget(**size, margins=margins).auto_save_svg('output/TopUsers_lefts_intermediate.svg')

In [None]:
# plot rights
bias = 'rights'
merged, user_by_bias = load_data()
top_merged = merged[merged.user_handle.isin(user_by_bias[bias])][['user_handle', 'rank_2016', 'rank_2020', 'bias']]
plot_biases = bias_to_plot_biases[bias]
new_merged = format_data(top_merged, plot_biases)
merged_data[bias] = new_merged
size = dict(width=1600, height=1200)
margins = dict(top=0, bottom=0, left=360, right=160)
sdd, flows = plot_intermediate_sankey(new_merged, bias)
fw.weave(sdd, flows, palette=palette).to_widget(**size, margins=margins).auto_save_svg('output/TopUsers_{}_intermediate.svg'.format(bias))

In [None]:
# plot fake_extreme_right
bias = 'fake_extreme_right'
merged, user_by_bias = load_data()
top_merged = merged[merged.user_handle.isin(user_by_bias[bias])][['user_handle', 'rank_2016', 'rank_2020', 'bias']]
plot_biases = bias_to_plot_biases[bias]
new_merged = format_data(top_merged, plot_biases)
merged_data[bias] = new_merged
size = dict(width=1600, height=1200)
margins = dict(top=0, bottom=0, left=360, right=160)
sdd, flows = plot_intermediate_sankey(new_merged, bias)
fw.weave(sdd, flows, palette=palette).to_widget(**size, margins=margins).auto_save_svg('output/TopUsers_{}_intermediate.svg'.format(bias))

We next post process each of the intermediary plots.

In [None]:
from IPython.display import SVG, display
lefts = post_process_intermediary_plot('lefts', merged_data['lefts'])
SVG(lefts)

In [None]:
rights = post_process_intermediary_plot('rights', merged_data['rights'])
SVG(rights)

In [None]:
fake_er = post_process_intermediary_plot('fake_extreme_right', merged_data['fake_extreme_right'])
SVG(fake_er)

In [None]:
import svg_stack as ss
doc = ss.Document()
layout1 = ss.HBoxLayout()
layout1.addSVG('output/TopUsers_lefts.svg',alignment=ss.AlignTop|ss.AlignHCenter)
layout1.addSVG('output/TopUsers_rights.svg',alignment=ss.AlignCenter)

layout2 = ss.VBoxLayout()
layout2.addLayout(layout1)
layout2.addSVG('output/TopUsers_fake_extreme_right.svg',alignment=ss.AlignCenter)

doc.setLayout(layout2)
doc.save('output/figure_4_no_legend.svg')


Our last step is to add the legends to the svg file

In [None]:
icon_legend_xml = '<g class="icon_legend" transform="translate(300, 1200)">\
    <rect height="180" style="fill:rgb(236,236,236)" width="450" x="0" y="0"></rect>\
    <text font-size="30px" text-anchor="start" x="0" y="30">Icon Legend</text>\
    <circle cx="0" cy="10" r="8" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(18,46)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="62">Linked to media organization</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(10,78)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="94">Linked to political organization</text>\
    <polygon points="-6.959 10.34,-2.099 9.74,0.001 5,2.161 9.74,6.961 10.34,3.481 14,4.321 19.1,0.001 16.7,-4.259 19.1,-3.419 14.06" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(18,107.5)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="126">Independent</text>\
    <polygon points="16 13.85641,0 13.85641,8 0" stroke="black" stroke-width="2" style="fill:rgb(102,102,102)" transform="translate(10,142)"></polygon>\
    <text font-size="24px" text-anchor="start" x="50" y="158">Other</text>\
    </g>'

color_legend_xml = '<g class="color_legend" transform="translate(2450, 1200)">\
    <rect height="280" style="fill:rgb(236,236,236)" width="300" x="0" y="0"></rect>\
    <text font-size="30px" text-anchor="start" x="0" y="30">Colour Legend</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,46)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="62">Left</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,78)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="94">Left leaning</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,110)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="126">Center</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,142)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="158">Right leaning</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,174)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="190">Right</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,206)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="222">Extreme bias right</text>\
    <rect x="0" y="0" width="16" height="16" stroke="black" stroke-width="2" fill="{}" transform="translate(10,238)"/>\
    <text font-size="24px" text-anchor="start" x="50" y="254">Fake news</text>\
    </g>'.format(*[palette[x] for x in ['left', 'left leaning', 'center', 'right leaning', 'right', 'right extreme', 'fake']])

xml = ET.parse('output/figure_4_no_legend.svg')
xmlstr = ET.tostring(xml.getroot(), pretty_print=True, encoding='unicode')
mysvg = BeautifulSoup(xmlstr, 'xml')

color_legend = BeautifulSoup(color_legend_xml).g.extract()
mysvg.svg.insert(0, color_legend)


icon_legend = BeautifulSoup(icon_legend_xml).g.extract()
mysvg.svg.insert(0, icon_legend)



with open('output/figure_4.svg', "w") as file:
    #file.write('<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">')
    file.write(mysvg.prettify())

SVG(mysvg.prettify())