# Make Spiderplots

## Libraries

In [17]:
import pandas as pd
import numpy as np

import seaborn as sns
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
from matplotlib import cm

## Functions and definitions

In [18]:
#max index of compounds for which reaxys reactions where analysed
number_of_reactions = 13

#Original column names picked from reaxys, suitable for analysis
important_columns = ['Temperature (Reaction Details) [C]','Time (Reaction Details) [h]','Solvent (Reaction Details)','Reagent/Catalyst','Yield (numerical)']

#change name of columns
proper_name = ['Temperature[C]','Time[h]','Solvent [hazardousness]','Catalyst [hazardousness]', 'Yield']


def radar_factory(num_vars, frame='circle'):
    """Create a radar chart with `num_vars` axes.

    This function creates a RadarAxes projection and registers it.

    Parameters
    ----------
    num_vars : int
        Number of variables for radar chart.
    frame : {'circle' | 'polygon'}
        Shape of frame surrounding axes.

    """
    # calculate evenly-spaced axis angles
    theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)

    class RadarAxes(PolarAxes):

        name = 'radar'

        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # rotate plot such that the first axis is at the top
            self.set_theta_zero_location('N')

        def fill(self, *args, closed=True, **kwargs):
            """Override fill so that line is closed by default"""
            return super().fill(closed=closed, *args, **kwargs)

        def plot(self, *args, **kwargs):
            """Override plot so that line is closed by default"""
            lines = super().plot(*args, **kwargs)
            for line in lines:
                self._close_line(line)

        def _close_line(self, line):
            x, y = line.get_data()
            # FIXME: markers at x[0], y[0] get doubled-up
            if x[0] != x[-1]:
                x = np.concatenate((x, [x[0]]))
                y = np.concatenate((y, [y[0]]))
                line.set_data(x, y)

        def set_varlabels(self, labels):
            self.set_thetagrids(np.degrees(theta), labels)

        def _gen_axes_patch(self):
            # The Axes patch must be centered at (0.5, 0.5) and of radius 0.5
            # in axes coordinates.
            if frame == 'circle':
                return Circle((0.5, 0.5), 0.5)
            elif frame == 'polygon':
                return RegularPolygon((0.5, 0.5), num_vars,
                                      radius=.5, edgecolor="k")
            else:
                raise ValueError("unknown value for 'frame': %s" % frame)

        def draw(self, renderer):
            """ Draw. If frame is polygon, make gridlines polygon-shaped """
            if frame == 'polygon':
                gridlines = self.yaxis.get_gridlines()
                for gl in gridlines:
                    gl.get_path()._interpolation_steps = num_vars
            super().draw(renderer)


        def _gen_axes_spines(self):
            if frame == 'circle':
                return super()._gen_axes_spines()
            elif frame == 'polygon':
                # spine_type must be 'left'/'right'/'top'/'bottom'/'circle'.
                spine = Spine(axes=self,
                              spine_type='circle',
                              path=Path.unit_regular_polygon(num_vars))
                # unit_regular_polygon gives a polygon of radius 1 centered at
                # (0, 0) but we want a polygon of radius 0.5 centered at (0.5,
                # 0.5) in axes coordinates.
                spine.set_transform(Affine2D().scale(.5).translate(.5, .5)
                                    + self.transAxes)


                return {'polar': spine}
            else:
                raise ValueError("unknown value for 'frame': %s" % frame)

    register_projection(RadarAxes)
    return theta



In [19]:
def normalize(x, minimum, maximum):
    '''
    Minimal function to make POC
    '''
    if float(maximum) == float(minimum):
        return 0.0
    else:
        return (float(x) - float(minimum))/((float(maximum) - float(minimum)))

## Main part

In [20]:
# Read Solvent list
####

solvent_hazzard = {}
fp = open('../data/processed/List_of_Solvents.tsv')
fp.readline()
for line in fp:
    tmp = line.strip().split('\t')
    solvent_hazzard[tmp[0]] = int(tmp[1])
print (solvent_hazzard)

{'methanol': 1, 'acetic acid': 2, 'chlorobenzene': 3, '1,4-dioxane': 5, 'N,N-dimethyl-formamide': 5, 'water': 1, 'nitromethane': 6, 'glycerol': 1, 'toluene': 3, 'acetonitrile': 3, 'ethyl acetate': 2, 'dichloromethane': 4, 'dimethyl sulfoxide': 3, 'ethanol': 1, 'chloroform': 6, '1,2-dichloro-ethane': 6, 'neat (no solvent)': 1}


In [21]:
# Read HCode severity list
####
hcode_severity = {}
fp =open('../data/processed/HCode_Severity.csv')
fp.readline()
for line in fp:
    tmp = line.strip().split(',')
    hcode_severity[tmp[0]] = float(tmp[1])
print (hcode_severity)

{'H200': 1.0, 'H201': 1.0, 'H202': 0.8333333333333334, 'H203': 0.6666666666666666, 'H204': 0.5, 'H205': 0.3333333333333333, 'H206': 1.0, 'H207': 0.5, 'H208': 0.25, 'H220': 1.0, 'H221': 0.75, 'H222': 1.0, 'H223': 0.75, 'H224': 1.0, 'H225': 0.75, 'H226': 0.5, 'H227': 0.25, 'H228': 0.75, 'H229': 0.5, 'H230': 0.5, 'H231': 0.5, 'H232': 1.0, 'H240': 1.0, 'H241': 0.8, 'H242': 0.4, 'H250': 1.0, 'H251': 1.0, 'H252': 0.75, 'H260': 1.0, 'H261': 0.5, 'H270': 1.0, 'H271': 1.0, 'H272': 0.5, 'H280': 0.5, 'H281': 0.5, 'H282': 1.0, 'H283': 0.75, 'H284': 0.5, 'H290': 1.0, 'H300': 1.0, 'H301': 0.5, 'H302': 0.25, 'H303': 0.1, 'H304': 1.0, 'H305': 0.75, 'H310': 1.0, 'H311': 0.5, 'H312': 0.25, 'H313': 0.1, 'H314': 1.0, 'H315': 0.75, 'H316': 0.5, 'H317': 1.0, 'H318': 1.0, 'H319': 0.75, 'H320': 0.5, 'H330': 1.0, 'H331': 0.5, 'H332': 0.25, 'H333': 0.1, 'H334': 1.0, 'H335': 0.5, 'H336': 0.5, 'H340': 1.0, 'H341': 0.75, 'H350': 1.0, 'H350i': 1.0, 'H351': 0.75, 'H360': 1.0, 'H360F': 1.0, 'H360D': 1.0, 'H360FD': 1.

In [22]:
# Read Catalyst list
###
catalyst_hazzard_codes = {}
fp = open('../data/processed/Catalysts_Overview.tsv')
fp.readline()
for line in fp:
    tmp = line.strip().split('\t')
    #print (tmp)
    if tmp[1] not in catalyst_hazzard_codes:
        catalyst_hazzard_codes[tmp[1]] = {'GSH': [x for x in tmp[5].split(';') if x != ''],
                                    'H': [x for x in tmp[6].split(';') if x != ''],
                                    'P': [x for x in tmp[7].split(';') if x != '']}
    else:
        catalyst_hazzard_codes[tmp[1]]['GSH'].extend([x for x in tmp[5].split(';') if x != ''])
        catalyst_hazzard_codes[tmp[1]]['H'].extend([x for x in tmp[6].split(';') if x != ''])            
        catalyst_hazzard_codes[tmp[1]]['P'].extend([x for x in tmp[7].split(';') if x != ''])            
                         


In [23]:
# Calculate severity of a molecule
# Currently the calculation = SUM of the individual H_Codes
##
hazzard_values = []
catalyst_hazzard = {}
for key in catalyst_hazzard_codes:
    
    #default hazzardnes is 0
    catalyst_hazzard[key] = 0.0
    
    #get all H-codes for this catalst
    H_codes = catalyst_hazzard_codes[key]['H']
    
    #initialize list with default 0.0
    severities = [0.0]
    
    #severity of a catalyst is sum of all hazzard values for the associated H-codes
    for h in H_codes:
        severities.append(hcode_severity[h])
    catalyst_hazzard[key] = sum(severities)

    
    hazzard_values.append(catalyst_hazzard[key])

In [24]:
#print all hazzard values
catalyst_hazzard

{'zinc trifluoromethanesulfonate': 1.0,
 'Polystyrene-Supported AlCl3': 1.0,
 '1-butylimidazolium tetrafluoroborate': 2.75,
 'poly(ethylene glycol)-600': 0.0,
 'samarium(III) trifluoromethanesulfonate': 2.0,
 'polyaniline/SiO2': 0.0,
 'iron(II,III) oxide': 0.0,
 '1-methyl-3-(4-sulfobutyl)-1H-imidazol-3-ium hydrogensulfate': 2.0,
 'titanium(IV) oxide': 0.0,
 'nanoparticle-supported cobalt catalyst; air': 2.0,
 'zirconium(IV) chloride': 2.0,
 'aluminum oxide': 0.5,
 'Thiamine hydrochloride': 0.0,
 'silica supported 12-tungstophosphoric acid nanoparticles': 1.0,
 'hydrogenchloride': 2.5,
 'toluene-4-sulfonic acid': 2.0,
 'gallium(III) triflate': 2.0,
 'potassium fluoride on basic alumina': 0.5,
 'zirconium oxide salicylaldehyde-(3-aminopropyl)trimethoxysilane imine complex modified SBA-15': 2.0,
 'sulfonated rice husk ash': 2.0,
 '1-butyl-3-methylimidazolium Tetrafluoroborate': 2.75,
 'Graphite': 0.0,
 'ZnO-loaded mesoporous silica (KIT-6) (aged at 130 C and containing 10 wt% ZnO)': 1.0,


In [25]:
# Load previously created reaction file (containing parameters for all found reactions)
###
reactions = pd.read_csv('../data/processed/All_Reactions.csv')
reactions.head()

Unnamed: 0.1,Unnamed: 0,Temperature (Reaction Details) [C],Time (Reaction Details) [h],Solvent (Reaction Details),Catalyst,Reagent,Yield (numerical),Other Conditions,Number of Reaction Steps,Reaction: Links to Reaxys,References,Compound_ID,Type,Reagent/Catalyst
0,0,20,0.0833333,acetonitrile,,gallium(III) triflate,100.0,,1.0,https://www.reaxys.com/reaxys/secured/hopinto....,"Article; Cai, Jing-Jing; Zou, Jian-Ping; Pan, ...",6,Other,gallium(III) triflate
1,1,110,0.166667,,,PEG-400,100.0,Neat (no solvent),1.0,https://www.reaxys.com/reaxys/secured/hopinto....,"Article; Zhang, Xia Zhong; Wang, Jin Xian; Sun...",6,Other,PEG-400
2,2,20,0.0833333,methanol,,zirconium(IV) chloride,100.0,,1.0,https://www.reaxys.com/reaxys/secured/hopinto....,"Article; Aghapoor, Kioumars; Darabi, Hossein R...",6,Other,zirconium(IV) chloride
3,3,20,0.0666667,methanol,,silica-supported stannous chloride,100.0,,1.0,https://www.reaxys.com/reaxys/secured/hopinto....,"Article; Darabi, Hossein Reza; Aghapoor, Kioum...",6,Other,silica-supported stannous chloride
4,4,25,0.0833333,methanol,,vitamin B1,100.0,Sonication,1.0,https://www.reaxys.com/reaxys/secured/hopinto....,"Article; Aghapoor, Kioumars; Mohsenzadeh, Fars...",6,Other,vitamin B1


In [26]:
# As the values of the parameters vary huge, e.g. , temperature 0-250C while solvent 1-6, all of them are normalized
# They are normalized between min/max (1=max, 0=min)
##

#initialize min_max dictionary (solvent and catalyst have trivial values)
max_min = {'Solvent (Reaction Details)':{'max':6,'min':1}, 'Reagent/Catalyst':{'max':max(hazzard_values),'min':min(hazzard_values)}}

#find min/max values for the remaining parameters
for f in important_columns:
    if f != 'Solvent (Reaction Details)' and f!= 'Reagent/Catalyst':
        tmp = reactions[f].copy()
        tmp.replace('None', np.nan, inplace=True)

        tmp = tmp.astype(float)

        maximum = (tmp.astype(float).max(skipna=True))
        minimum = (tmp.astype(float).min(skipna=True))

        max_min[f] = {'max':maximum,'min':minimum}

print (max_min)

{'Solvent (Reaction Details)': {'max': 6, 'min': 1}, 'Reagent/Catalyst': {'max': 8.25, 'min': 0.0}, 'Temperature (Reaction Details) [C]': {'max': 230.0, 'min': 20.0}, 'Time (Reaction Details) [h]': {'max': 24.0, 'min': 0.00416667}, 'Yield (numerical)': {'max': 100.0, 'min': 25.0}}


### Get values

In [28]:
all_data = []
all_labels = []

for drug_index in range(1,number_of_reactions+2):
    print (drug_index)

    #get according reactions (HTS=Fabian)
    all_together = reactions.loc[(reactions['Compound_ID'] == drug_index)]
    current_reaction_literature = reactions.loc[(reactions['Compound_ID'] == drug_index) & (reactions['Type'] == 'Other')]
    current_reaction_fabian = reactions.loc[(reactions['Compound_ID'] == drug_index)  & (reactions['Type'] == 'Fabian')]

    
    # Remove bad reactions
    ### --> seem to have no big impact (so just keep data then)
    ### --> code artefact, could be used for a different project (here no impact)
    '''
    num_all_reactions = len(current_reaction_literature)
    #In my opinion literature entries that have no yield/time reported are not valid ==> double check with Miriam/Fabian
    current_reaction_literature = current_reaction_literature.loc[(current_reaction_literature['Yield (numerical)'] != 'None')
                                                                  & (current_reaction_literature['Time (Reaction Details) [h]'] != 'None')
                                                                  & (current_reaction_literature['Temperature (Reaction Details) [C]'] != 'None')]
    
    all_together = all_together.loc[(all_together['Yield (numerical)'] != 'None') 
                                    & (all_together['Time (Reaction Details) [h]'] != 'None')
                                    & (all_together['Temperature (Reaction Details) [C]'] != 'None')]
    num_valid_reactions =len(current_reaction_literature)
    print ('Number of ractions removed: %d' %(num_all_reactions-num_valid_reactions))
    '''


    #This will be the mean line
    results_mean_std = {}
    results_all = {}


    ## REAL (= Fabians conditions/ =HTS)
    ####
    #go through all columns
    for f in important_columns:
        print (f)
        
        #dictionary to save values for a specific features (parameter)
        results_all[f] = []
        
        #if the feature none of the following (need to be handled differently)
        if f != 'Solvent (Reaction Details)' and f!='Reagent/Catalyst':

            valid_values = []
            #normalize all values between min and max
            for val in current_reaction_fabian[f].values:
                if val != 'None':
                    norm_val = normalize(val,max_min[f]['min'],max_min[f]['max'])

                    valid_values.append(norm_val)
                    results_all[f].append(norm_val)
                else:
                    results_all[f].append(np.nan)


        elif f == 'Solvent (Reaction Details)':
            solvents = current_reaction_fabian[f].values
            valid_results = []
            for solvent in solvents:

                if solvent != 'None':
                    split_s = solvent.split(';')
                    values = []
                    for s in split_s:
                        s = s.strip()
                        norm_val = normalize(solvent_hazzard[s], max_min[f]['min'], max_min[f]['max'])
                        values.append(norm_val)
                    valid_results.append(max(values))
                    results_all[f].append(max(values))

                else:
                    results_all[f].append(np.nan)

        
        elif f == 'Reagent/Catalyst':
            catalysts = current_reaction_fabian[f].values
            valid_results = []
            for catalyst in catalysts:
                if catalyst != 'None':
                    norm_val = normalize(catalyst_hazzard[catalyst], max_min[f]['min'], max_min[f]['max'])
                    
                    valid_results.append(norm_val)
                    results_all[f].append(norm_val)
                else:
                    results_all[f].append(np.nan)

    
        else:
            print ('Unkown column')


    
    # Literature found reactions
    ####
    for f in important_columns:
        #print (f)
        #results_all[f] = []

        if f != 'Solvent (Reaction Details)' and f!='Reagent/Catalyst':

            valid_values = []
            for val in current_reaction_literature[f].values:
                if val != 'None':
                    norm_val = normalize(val,max_min[f]['min'],max_min[f]['max'])

                    valid_values.append(norm_val)
                    results_all[f].append(norm_val)
                else:
                    results_all[f].append(np.nan)

            results_mean_std[f] = {'mean':np.mean(valid_values),
                                   'std':np.std(valid_values)}

        elif f == 'Solvent (Reaction Details)':
            solvents = current_reaction_literature[f].values
            valid_results = []
            for solvent in solvents:
                #print (s)

                if solvent != 'None':
                    split_s = solvent.split(';')
                    values = []
                    for s in split_s:
                        s = s.strip()
                        norm_val = normalize(solvent_hazzard[s], max_min[f]['min'], max_min[f]['max'])
                        values.append(norm_val)
                    valid_results.append(max(values))
                    results_all[f].append(max(values))

                else:
                    results_all[f].append(np.nan)
            results_mean_std[f] = {'mean':np.mean(valid_results),'std':np.std(valid_results)}
        
        elif f == 'Reagent/Catalyst':
            catalysts = current_reaction_literature[f].values
            valid_results = []
            for catalyst in catalysts:
                if catalyst != 'None' :

                    norm_val = normalize(catalyst_hazzard[catalyst], max_min[f]['min'], max_min[f]['max'])
                    
                    valid_results.append(norm_val)
                    results_all[f].append(norm_val)
                else:
                    results_all[f].append(np.nan)
            results_mean_std[f] = {'mean':np.mean(valid_results),'std':np.std(valid_results)}
                    
    
        else:
            print ('Unkown column')

            
    #Number of reactions (plus fabians)
    number_elements = (len(list(results_all.values())[0]))
    
    #this contains the actual lists of datapoints (2D list, each element is one reaction)
    individual_data = []
    for i in range(0,number_elements):
        tmp = []
        use_data_point = True
        for f in important_columns:

            val = results_all[f][i]

            #if a datapoint is nan, then assume mean
            if str(val) != 'nan':
                tmp.append(val)
            else:
                tmp.append(results_mean_std[f]['mean'])
                use_data_point= False
        if use_data_point:
            individual_data.append(tmp)


    #if empty then no reactions found, i.e., there is no compound 12
    if len(individual_data) == 0:
        break
    
    #calculate the mean (for all literature points)
    mean_values_literature = []
    for f in range(0,len(individual_data[0])):
        tmp = []
        for point in individual_data[1:]:
            tmp.append(point[f])
        mean_values_literature.append(np.mean(tmp))

        
    #create final list of datapoints (last entry HTS, second last is Mean line)
    data_to_plot = []
    data_to_plot.extend(individual_data)
    
    all_data.extend(individual_data)
    all_labels.append('Real_'+str(drug_index))
    all_labels.extend(['Literature_'+str(drug_index)]*len(individual_data[1:]))
    
    data_to_plot.extend([mean_values_literature])
    real_entry = data_to_plot[0] #fabians entry is always the first
    data_to_plot.pop(0) #remove fabian from the beginning
    data_to_plot.extend([real_entry]) #add fabian reaction to the end (so it is on top of the other lines)


    

    # From here plotting
    ###
    data = [proper_name,('Compound_'+str(drug_index)+' [n = %s]' %str(len(data_to_plot)-2) ,data_to_plot)]


    N = len(data[0])
    theta = radar_factory(N, frame='polygon')

    spoke_labels = data.pop(0)
    title, case_data = data[0]

    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(projection='radar'))
    fig.subplots_adjust(top=0.85, bottom=0.05)

    ax.set_rgrids([0.33, 0.66, 1.0])
    ax.set_title(title,  position=(0.5, 1.1), ha='center')

    last_index = len(case_data) - 1

    for number,d in enumerate(case_data):
        if number == last_index:
            line = ax.plot(theta, d, lw = 2, c = 'red')
            ax.fill(theta, d,  alpha=0.25, c = 'red')

        elif number == last_index - 1:
            line = ax.plot(theta, d, lw = 3, c = 'black')
            ax.fill(theta, d,  alpha=0.25, c = 'grey')
        else:
            line = ax.plot(theta, d,ls='--')
            ax.fill(theta, d,  alpha=0.15)
    ax.set_varlabels(spoke_labels)
    #plt.legend(['Test','Test2'],loc=1)
    #plt.show()
    plt.savefig('../results/2_Spiderplot_Auguts2020/'+title+'.pdf')
    plt.close()



1
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
2
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
3
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
4
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
5
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)
  mask = r < 0


6
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
7
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
8
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
9
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
10
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
11
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
12
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalyst
Yield (numerical)
13
Temperature (Reaction Details) [C]
Time (Reaction Details) [h]
Solvent (Reaction Details)
Reagent/Catalys

### Make Mean Spider plot
same as before but just one overal mean spiderplot (summarizing all 13 reactions)

In [29]:
real_data = []
literature_data = []

for val, l in zip(all_data,all_labels):
    if 'Real' in l :
        real_data.append(val)
    else:
        literature_data.append(val)


mean_values_literature = []
for f in range(0,len(literature_data[0])):
    tmp = []
    for point in literature_data:
        tmp.append(point[f])
    mean_values_literature.append(np.mean(tmp))

    
mean_values_real = []
for f in range(0,len(real_data[0])):
    tmp = []
    for point in real_data:
        tmp.append(point[f])
    mean_values_real.append(np.mean(tmp))
        

data_to_plot = literature_data.copy()
data_to_plot.extend([mean_values_literature,mean_values_real])



# From here plotting
###
data = [proper_name,('Mean_Compounds [n = %s]' %str(len(data_to_plot)-2) ,data_to_plot)]


N = len(data[0])
theta = radar_factory(N, frame='polygon')

spoke_labels = data.pop(0)
title, case_data = data[0]

fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(projection='radar'))
fig.subplots_adjust(top=0.85, bottom=0.05)

ax.set_rgrids([0.33, 0.66, 1.0])
ax.set_title(title,  position=(0.5, 1.1), ha='center')

last_index = len(case_data) - 1

for number,d in enumerate(case_data):
    if number == last_index:
        line = ax.plot(theta, d, lw = 2, c = 'red')
        ax.fill(theta, d,  alpha=0.25, c = 'red')

    elif number == last_index - 1:
        line = ax.plot(theta, d, lw = 3, c = 'black')
        ax.fill(theta, d,  alpha=0.25, c = 'grey')
    else:
        line = ax.plot(theta, d,ls='--')
        ax.fill(theta, d,  alpha=0.075)
ax.set_varlabels(spoke_labels)

# Plot the actual Mean Spiderplot
##
plt.savefig('../results/2_Spiderplot_Auguts2020/'+title+'.pdf')
plt.close()

### Heatmap 
Make a heatmap showing overal reaction conditions (scale for some parameters)

In [31]:
#define manual scaling (same features mostly vary within certain borders)
manual_borders =  {'Temperature (Reaction Details) [C]': {'Max':0.75, 'Min':0.0},
                   'Time (Reaction Details) [h]':{'Max':0.5, 'Min':0.0},
                   'Yield (numerical)':{'Max':1.0, 'Min':0.5},
                   'Solvent (Reaction Details)':{'Max':1.0,'Min':0.0},
                   'Reagent/Catalyst':{'Max':0.4,'Min':0.0}}
#define a feature order
feature_order = ['Temperature (Reaction Details) [C]', 'Time (Reaction Details) [h]','Solvent (Reaction Details)', 'Reagent/Catalyst', 'Yield (numerical)']

#define colors (HTS=orange, literature found = grey)
colors = []
for label in all_labels:
    if 'Real' in label: 
        colors.append('#F8B301')
    else:
        colors.append('grey')
    

data_inverted = []
for data in all_data:
    tmp = []
    #print(data)
    for val,f in zip(data[:-1], feature_order):
        
        #by far most reported yields are between 1 and 4.5
        val = (val - manual_borders[f]['Min'])/(manual_borders[f]['Max']-manual_borders[f]['Min'])
        if val > 1:
            val = 1
        if val < 0:
            val = 0

        tmp.append(1-val)
    
    yield_val = (data[-1] - manual_borders['Yield (numerical)']['Min'])/(manual_borders['Yield (numerical)']['Max']-manual_borders['Yield (numerical)']['Min'])
    #print(yield_val)
    if yield_val > 1:
        yield_val = 1
    if yield_val < 0:
        yield_val = 0
    
    tmp.append(yield_val)

    data_inverted.append(tmp)
    
# Plot the actual clusermap
##
sns.clustermap(data_inverted,xticklabels=important_columns, row_colors=[colors], cmap='RdYlGn')
plt.savefig('../results/2_Spiderplot_Auguts2020/Reactions_Overview.pdf')
plt.close()