In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3, venn3_circles
import warnings
import os
from tqdm import tqdm
from scipy.stats import chisquare
warnings.filterwarnings('ignore')

In [None]:
# read the most frequent 1000 unstructured medication entries, that have been evaluated and corrected 
# manually by an interdisciplinary team
df = pd.read_csv('../../data_in/FINAL_TOP1000.csv',skipinitialspace=True, sep=";")
#remove leading whitespaces and whitesspaces at the end of each row in column MEDICATION
df['MEDICATION'] = df['MEDICATION'].str.replace('\n','\r')


In [None]:
#determine correct ATC code for each row, depending on evaluation columns for each algorithm result
df['ATC_CORRECT'] = ""

for index, row in df.fillna(0).iterrows():
    if row['ATC_CODE']:
        corr = row['ATC_CODE']
    elif row['eval1']:
        corr = row['STEP1']
    elif row['eval2']:
        corr = row['STEP2']
    elif row['eval31']:
        corr = row['STEP31']
    elif row['eval32']:
        corr = row['STEP32']
    elif row['eval33']:
        corr = row['STEP33']
    elif row['correct']:
        corr = row['correct']
    else:
        # print('no correct value found: ', str(index), row['MEDICATION'])
        pass
    df.at[index, 'ATC_CORRECT'] = corr

In [None]:
df[["TRUE12","TRUE13","TRUE23","TRUE123"]].describe(include='all')
print("TRUE12 total: ",len(df.loc[((df['TRUE12'] == 1))]))
print("TRUE12 + correct: ",len(df.loc[((df['TRUE12'] == 1) & (df['eval1'] == 1))]))
print("TRUE23 total: ",len(df.loc[((df['TRUE23'] == 1))]))
print("TRUE23 + correct: ",len(df.loc[((df['TRUE23'] == 1) & (df['eval2'] == 1))]))
print("TRUE13 total: ",len(df.loc[((df['TRUE13'] == 1))]))
print("TRUE13 + correct: ",len(df.loc[((df['TRUE13'] == 1) & (df['eval1'] == 1))]))
print("TRUE123 total: ",len(df.loc[((df['TRUE123'] == 1))]))
print("TRUE123 + correct: ",len(df.loc[((df['TRUE123'] == 1) & (df['eval1'] == 1))]))
print("TOP1000- total correct STEP3: ", (df["size"].loc[((df['eval31'] == 1))]).sum())
print("TOP1000- total WRONG STEP3: ", (df["size"].loc[((df['eval31'] == 0))]).sum())
print("TOP1000-  correct STEP3: ", (len(df.loc[((df['eval31'] == 1))])))
print("TOP1000-  WRONG STEP3: ", (len(df.loc[((df['eval31'] == 0))])))

#Investiation of wrong results for algorithm 3
wrongDF = df.loc[((df['eval31'] == 0) & (df['ATC_CORRECT'] != "nomed")& (df['ATC_CORRECT'] != "unspec")& (df['level'] != "3") & (df['level'] != "4"))]

groupedATC = wrongDF.groupby("ATC_CORRECT")["eval31"].value_counts()
groupedATC.reset_index(name='count')
grouped_ATC = groupedATC.rename('count').reset_index()

wrongDF['ATC_L1'] = wrongDF['ATC_CORRECT'].str[:1]
wrongDF['ATC_L2'] = wrongDF['ATC_CORRECT'].str[:3]
wrongDF['ATC_L3'] = wrongDF['ATC_CORRECT'].str[:4]
wrongDF['ATC_L4'] = wrongDF['ATC_CORRECT'].str[:5]
wrongDF[["MEDICATION","size","LEV1","ATC_CORRECT","ATC_L1","ATC_L2","ATC_L3","ATC_L4"]].describe(include='all')

In [None]:
#read data set 1
df_med_orders = pd.read_csv('../../data_in/FINAL_medication_orders.csv', skipinitialspace=True, low_memory=False, lineterminator='\n').fillna(str())
#change format of column START_DATE to datetime, needed for next step
df_med_orders["START_DATE"] = pd.to_datetime(df_med_orders["START_DATE"])
#filter medication orders to the year 2016 until 2020
df_med_orders = df_med_orders[~((df_med_orders["START_DATE"].dt.year < 2016) | (df_med_orders["START_DATE"].dt.year > 2020))]
#remove rows where MEDICATION row is null
df_med_orders = df_med_orders[~df_med_orders['MEDICATION'].isnull()]
#change data type to string for MEDICATION and CAT_MATCH column
df_med_orders['MEDICATION'] = df_med_orders['MEDICATION'].astype(str)
df_med_orders['CAT_MATCH'] = df_med_orders['CAT_MATCH'].astype(str)
#change Fuzzy Match to No Match for column CAT_MATCH
df_med_orders.loc[df_med_orders["CAT_MATCH"] == "Fuzzy Match", 'CAT_MATCH'] = "No Match"
#remove leading whitespaces and whitesspaces at the end of each row in column MEDICATION
df_med_orders['MEDICATION'] = df_med_orders['MEDICATION'].str.strip().replace("\n", '')
df_med_orders.groupby(['CAT_MATCH']).count()


In [None]:
# add atc codes for all structured medication orders to the new column "ATC_CORRECT"
for atc in tqdm(np.unique(np.unique(df_med_orders[df_med_orders['CAT_MATCH'] == 'Match']['ATC_CODE']))):     
    df_med_orders.loc[(df_med_orders['ATC_CODE'] == atc) & (df_med_orders['CAT_MATCH'] == 'Match'), 'ATC_CORRECT'] = atc

In [None]:
df_med_orders.loc[df_med_orders['MEDICATION'].str.strip().str.startswith('BE '),'ATC_CORRECT'] = 'nomed'
df_med_orders.loc[df_med_orders['MEDICATION'].str.strip().str.startswith('BE:'),'ATC_CORRECT'] = 'nomed'
df_med_orders.loc[df_med_orders['MEDICATION'].str.strip().str.startswith('1 BE'),'ATC_CORRECT'] = 'nomed'
df_med_orders.loc[df_med_orders['MEDICATION'].str.strip().str.startswith('BB '),'ATC_CORRECT'] = 'nomed'
df_med_orders.loc[df_med_orders['MEDICATION'].str.strip().str.startswith('!'),'ATC_CORRECT'] = 'nomed'

In [None]:
print("****************************************************************************************************************")
print("TOP 1000 - Number of of different txt entries algorithm 1 + 2 results match: ", len(df.loc[df['TRUE12']==1].loc[df['STEP1']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 1 + 2 results match: ", df["size"].loc[df['TRUE12']==1].loc[df['STEP1']!=''].sum())
print("TOP 1000 - Number of of different txt entries algorithm 1 + 3 results match: ", len(df.loc[df['TRUE13']==1].loc[df['STEP1']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 1 + 3 results match: ", df["size"].loc[df['TRUE13']==1].loc[df['STEP1']!=''].sum())
print("TOP 1000 - Number of of different txt entries algorithm 2 + 3 results match: ", len(df.loc[df['TRUE23']==1].loc[df['STEP2']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 2 + 3 results match: ", df["size"].loc[df['TRUE23']==1].loc[df['STEP2']!=''].sum())
print("TOP 1000 - Number of of different txt entries algorithm 1 + 2 + 3 results match: ", len(df.loc[df['TRUE123']==1].loc[df['STEP2']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 1 + 2 + 3 results match: ", df["size"].loc[df['TRUE123']==1].loc[df['STEP2']!=''].sum())
print("****************************************************************************************************************")
print("TOP 1000 - Number of of different txt entries algorithm 1 identified an ATC code: ", len(df.loc[df['STEP1']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 1 identified an ATC code: ", df["size"].loc[df['STEP1']!=''].sum())
print("TOP 1000 - Number of of different txt entries algorithm 2 identified an ATC code: ", len(df.loc[df['STEP2']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 2 identified an ATC code: ", df["size"].loc[df['STEP2']!=''].sum())
print("TOP 1000 - Number of of different txt entries algorithm 3 identified an ATC code: ", len(df.loc[df['STEP31']!='']))
print("TOP 1000 - Total number of drug prescriptions algorithm 3 identified an ATC code: ", df["size"].loc[df['STEP31']!=''].sum())

In [None]:
# internal validation step to check applied results to original data set of drug prescriptions was successful
unstruct_merge_left = pd.merge(unstructured, df,  how='inner', on ='MEDICATION')
unstruct_merge_RO = pd.merge(unstructured, df,  how='outer', on ='MEDICATION', indicator=True).query('_merge=="right_only"')
unstruct_merge_LO = pd.merge(unstructured, df,  how='left', on ='MEDICATION', indicator=True).query('_merge=="left_only"')
unstruct_merge_L = pd.merge(unstructured, df[['MEDICATION','ATC_CORRECT']],  how='left', on ='MEDICATION')
len(unstruct_merge_RO)
print("Expected size as calculated at the beginnung: ",df['size'].sum())
print("All medication orders: ", len(df_med_orders))
print("All no medications removed by rules: ", len(no_med))
print("All sctructured: ", len(structured))
print("All unsctructured: ", len(unstructured))
print("right outer join: ", unstruct_merge_RO['size'].sum())
print("left outer join: ", unstruct_merge_LO['size'].sum())
print("left join: ", len(unstruct_merge_L))

print("Inner Join: ", len(unstruct_merge_left))
unstruct_merge_RO.to_csv('../../data_in/df_merge_RO.csv', index=False)
unstruct_merge_RO[['MEDICATION','size']]
unstruct_merge_L = unstruct_merge_L.drop('ATC_CORRECT_x', 1)
unstruct_merge_L.rename(columns={"ATC_CORRECT_y": "ATC_CORRECT"}, inplace=True)
unstruct_merge_L.describe(include ='all')

In [None]:
#merge structured data and final unstructured data including ATC codes for all unstructured and evaluated entries
FINAL_med_orders = pd.concat([unstruct_merge_L, structured])

In [None]:
FINAL_med_orders.loc[FINAL_med_orders['MEDICATION'].str.strip().str.startswith('BE '),'ATC_CORRECT'] = 'nomed'
FINAL_med_orders.loc[FINAL_med_orders['MEDICATION'].str.strip().str.startswith('BE:'),'ATC_CORRECT'] = 'nomed'
FINAL_med_orders.loc[FINAL_med_orders['MEDICATION'].str.strip().str.startswith('1 BE'),'ATC_CORRECT'] = 'nomed'
FINAL_med_orders.loc[FINAL_med_orders['MEDICATION'].str.strip().str.startswith('BB '),'ATC_CORRECT'] = 'nomed'
FINAL_med_orders.loc[FINAL_med_orders['MEDICATION'].str.strip().str.startswith('!'),'ATC_CORRECT'] = 'nomed'

In [None]:
#df["eval1"] = df["eval1"].astype(int)
df['eval1'] = df['eval1'].astype('Int64')
df['eval2'] = df['eval2'].astype('Int64')
print("correct number of results by algorithm 1: ", len(df.loc[df['eval1']==1]))
print("TOP 1000 - total drug prescription with correct ATC code by algorithm 1: ", df["size"].loc[df['eval1']==1].sum())
print("WRONG number of results by algorithm 1: ",len(df.loc[df['eval1']==0]))
print("TOP 1000 - total drug prescription with WRONG ATC code by algorithm 1: ", df["size"].loc[df['eval1']==0].sum())
print("correct number of results by algorithm 2: ", len(df.loc[df['eval2']==1]))
print("TOP 1000 - total drug prescription with correct ATC code by algorithm 1: ", df["size"].loc[df['eval2']==1].sum())
print("WRONG number of results by algorithm 2: ", len(df.loc[df['eval2']==0]))
print("TOP 1000 - total drug prescription with WRONG ATC code by algorithm 1: ", df["size"].loc[df['eval2']==0].sum())
print("correct number of results by algorithm 3 option 1: ", len(df.loc[df['eval31']==1]))
print("TOP 1000 - total drug prescription with correct ATC code by algorithm 1: ", df["size"].loc[df['eval31']==1].sum())
print("WRONG number of results by algorithm 3 option 1: ", len(df.loc[df['eval31']==0]))
print("TOP 1000 - total drug prescription with WRONG ATC code by algorithm 1: ", df["size"].loc[df['eval31']==0].sum())

In [None]:
#create new columns in the final results to have all ATC levels included as separate columns
#prep work for further visualization
FINAL_med_orders['ATC_L1'] = FINAL_med_orders['ATC_CORRECT'].str[:1]
FINAL_med_orders['ATC_L2'] = FINAL_med_orders['ATC_CORRECT'].str[:3]
FINAL_med_orders['ATC_L3'] = FINAL_med_orders['ATC_CORRECT'].str[:4]
FINAL_med_orders['ATC_L4'] = FINAL_med_orders['ATC_CORRECT'].str[:5]

FINAL_med_orders.loc[ (FINAL_med_orders['ATC_CORRECT'] == "nomed"),"ATC_L1"] = 'nomed'
FINAL_med_orders.loc[ (FINAL_med_orders['ATC_CORRECT'] == "unspec"),"ATC_L1"] = 'unspec'
FINAL_med_orders['ATC_CORRECT'] = FINAL_med_orders['ATC_CORRECT'].fillna('no_eval')
FINAL_med_orders.loc[ (FINAL_med_orders['ATC_CORRECT'] == "no_eval"),"ATC_L1"] = 'no_eval'

FINAL_med_orders.loc[ (FINAL_med_orders['CAT_MATCH'] == "No Match"),"CAT_MATCH_INT"] = 0
FINAL_med_orders.loc[ (FINAL_med_orders['CAT_MATCH'] == "Match"),"CAT_MATCH_INT"] = 1

In [None]:
groupedATC = FINAL_med_orders.groupby("ATC_L1")["CAT_MATCH_INT"].value_counts()
groupedATC_L2 = FINAL_med_orders.groupby(["ATC_L1","ATC_L2"])["CAT_MATCH_INT"].value_counts()
groupedATC.reset_index(name='count')
grouped_ATC = groupedATC.rename('count').reset_index()
print(grouped_ATC)
groupedATC_L2.reset_index(name='count')
groupedATC_L2 = groupedATC_L2.rename('count').reset_index()
print(groupedATC_L2)

In [None]:
#Scatter Plot Prepare Dataframe with Level 5!! (prepare data for Figure 5 of the publication)
groupedATC_L5 = FINAL_med_orders.groupby("ATC_CORRECT")["CAT_MATCH_INT"].value_counts()
groupedATC_L5.reset_index(name='count')
groupedATC_L5 = groupedATC_L5.rename('count').reset_index()
spreadATC5=groupedATC_L5.pivot_table(index=['ATC_CORRECT'],columns='CAT_MATCH_INT',values='count',fill_value=0)

spreadATC5 = spreadATC5.reset_index()
spreadATC5.columns = ['ATC_CORRECT', 'unstructured', 'structured']
spreadATC5['total']=(spreadATC5['unstructured'] + spreadATC5['structured'])
spreadATC5['percent structured'] = ((spreadATC5['structured'] / spreadATC5['total']) *100).round(2)
spreadATC5['percent unstructured'] = ((spreadATC5['unstructured'] / spreadATC5['total']) *100).round(2)
spreadATC5 = spreadATC5.sort_values(by=['total'])
#export multimedia appendix 3 data
spreadATC5.to_csv('../../data_results/scatter_input.csv', index=False)

In [None]:
#Data prep for barplot, stacked, Level 1 only
spreadATC=grouped_ATC.pivot_table(index=['ATC_L1'],columns='CAT_MATCH_INT',values='count',fill_value=0)
spreadATC = spreadATC.reset_index()
spreadATC.columns = ['ATC_L1', 'unstructured', 'structured']
spreadATC['total']=(spreadATC['unstructured'] + spreadATC['structured'])
spreadATC['Perc_Struct'] = ((spreadATC['structured'] / spreadATC['total']) *100).round(2)
spreadATC['Perc_UnStruct'] = ((spreadATC['unstructured'] / spreadATC['total']) *100).round(2)
spreadATC = spreadATC.sort_values(by=['Perc_Struct'])
#print(spreadATC5)
print(spreadATC5["total"].loc[spreadATC5["total"]> 45000].sum()/spreadATC5["total"].sum())
print(spreadATC5.loc[spreadATC5["total"]> 45000])

In [None]:
#remove entries marked as nomed, unspec or no_eval for plot (Figure 5)
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='nomed')]
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='unspec')]
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='no_eval')]

In [None]:
#Data prep for barplot, stacked, Level 1 only
spreadATC_L2=groupedATC_L2.pivot_table(index=['ATC_L1','ATC_L2'],columns='CAT_MATCH_INT',values='count',fill_value=0)
spreadATC_L2 = spreadATC_L2.reset_index()
spreadATC_L2.columns = ['ATC_L1','ATC_L2', 'unstructured', 'structured']
spreadATC_L2['total']=(spreadATC_L2['unstructured'] + spreadATC_L2['structured'])
spreadATC_L2['Perc_Struct'] = ((spreadATC_L2['structured'] / spreadATC_L2['total']) *100).round(2)
spreadATC_L2['Perc_UnStruct'] = ((spreadATC_L2['unstructured'] / spreadATC_L2['total']) *100).round(2)
spreadATC_L2 = spreadATC_L2.sort_values(by=['total'])

In [None]:
#Barplot stacked for Overview structured versus unstructured for 14 ATC level 1 groups (Figure 4)
spreadATC = spreadATC[~(spreadATC['ATC_L1']=='nomed')]
spreadATC = spreadATC[~(spreadATC['ATC_L1']=='unspec')]
spreadATC = spreadATC[~(spreadATC['ATC_L1']=='no_eval')]

fig, ax = plt.subplots()
plt.rcParams["figure.figsize"] = [12,10]
plt.rcParams["figure.autolayout"] = True

ax.barh(spreadATC['ATC_L1'], spreadATC['Perc_UnStruct'], align='center', height=0.5, color='#da5543',label='unstructured')
ax.barh(spreadATC['ATC_L1'], spreadATC['Perc_Struct'], align='center', height=0.5, left=spreadATC['Perc_UnStruct'], color='#6786f6',label='sructured')
ax.set_yticks(spreadATC['ATC_L1'])
ax.set_ylabel('ATC level 1 groups')
ax.set_xlabel('percentage')
ax.set_title('Structurdness or drug order data by ATC Level 1 for 85.18% of initial data set ds1')
ax.bar_label(ax.containers[0], label_type='edge', color='black', fontsize=10, padding=3)
#ax.bar_label(ax.containers[0], label_type='edge', color='black', fontsize=10, padding=3)
#ax.grid(True)
ax.legend()
plt.tight_layout()
plt.savefig('../../data_results/finalAssessmentATCL1.png', dpi=400,bbox_inches='tight')

In [None]:

spreadATC_L2=groupedATC_L2.pivot_table(index=['ATC_L1','ATC_L2'],columns=['CAT_MATCH_INT'],values='count',fill_value=0)
spreadATC_L2 = spreadATC_L2.reset_index()
spreadATC_L2.columns = ['ATC_L1', 'ATC_L2', 'unstructured', 'structured']
spreadATC_L2['total']=(spreadATC_L2['unstructured'] + spreadATC_L2['structured'])
spreadATC_L2['Perc_Struct'] = ((spreadATC_L2['structured'] / spreadATC_L2['total']) *100).round(2)
spreadATC_L2['Perc_UnStruct'] = ((spreadATC_L2['unstructured'] / spreadATC_L2['total']) *100).round(2)
spreadATC_L2 = spreadATC_L2.sort_values(by=['total'])


In [None]:
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='nomed')]
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='unspec')]
spreadATC5 = spreadATC5[~(spreadATC5['ATC_CORRECT']=='no_eval')]

In [None]:
#create Scatter plot (Figure 5 a)
spreadATC5['ATC level 1'] = spreadATC5['ATC_CORRECT'].str[:1]
meanVal = spreadATC5["structured"].sum()/spreadATC5["total"].sum()

# Create an array with the colors you want to use
colorsInes = ["#1984c5", "#22a7f0", "#63bff0", "#a7d5ed", "#e2e2e2", "#e1a692", "#de6e56", "#e14b31", "#c23728"]


print(meanVal)
#fig, ax = plt.subplots(2, 2, figsize=(10,10))
fig, ax = plt.subplots(figsize=(25,10))
plt.rcParams["figure.figsize"] = [10,10]
plt.rcParams["figure.autolayout"] = True
customPalette = sns.set_palette(sns.color_palette(colorsInes))
#scatter = sns.scatterplot(data = spreadATC5, x = "total", y = "percent structured",hue="percent structured", size="total", palette="coolwarm_r",sizes=(10, 200))
scatter = sns.scatterplot(data = spreadATC5, x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r")

ax.set_ylim(0, 100)
#ax.legend(loc='best')
plt.legend(bbox_to_anchor=(1.9,1), loc="upper left")
plt.savefig('../../data_results/scatter.png', dpi=400,bbox_inches='tight')

In [None]:
#subplots of 2 best and to worst ATC L1 groups in terms of structuredness of drug orders (Figure 5b)
fig, ax = plt.subplots(7, 2, figsize=(5,12), sharex=True, sharey=True)

#plt.rcParams["figure.figsize"] = [10,10]
#plt.rcParams["figure.autolayout"] = True

scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "S")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[0][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "H")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[0][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "R")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[1][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "C")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[1][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "J")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[2][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "V")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[2][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "B")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[3][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "N")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[3][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "D")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[4][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "L")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[4][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "A")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[5][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "G")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[5][1],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "M")], x = "total", y = "percent structured",hue="percentnstructured", palette="coolwarm_r",ax=ax[6][0],legend = False)
scatter = sns.scatterplot(data = spreadATC5.loc[ (spreadATC5['ATC level 1'] == "P")], x = "total", y = "percent structured",hue="percent structured", palette="coolwarm_r",ax=ax[6][1],legend = False)
#sns.regplot(data=spreadATC5, x="total", y="Perc_Struct", scatter=False, ax=scatter)
#sns.regplot(data=spreadATC5, x="total", y="Perc_Struct", scatter=False, ax=scatter, x_jitter=.05)
#ax.set_ylim(0, 100)
#ax.legend(loc='best')
#plt.legend(bbox_to_anchor=(1.2, 1.7), loc="upper left",prop={'size': 12})
#ax.legend().remove()

ax[0, 0].set_title("ATC L1 - S").set_fontsize(12)
ax[0, 1].set_title("ATC L1 - H").set_fontsize(12)
ax[1, 0].set_title("ATC L1 - R").set_fontsize(12)
ax[1, 1].set_title("ATC L1 - C").set_fontsize(12)
ax[2, 0].set_title("ATC L1 - J").set_fontsize(12)
ax[2, 1].set_title("ATC L1 - V").set_fontsize(12)
ax[3, 0].set_title("ATC L1 - B").set_fontsize(12)
ax[3, 1].set_title("ATC L1 - N").set_fontsize(12)
ax[4, 0].set_title("ATC L1 - D").set_fontsize(12)
ax[4, 1].set_title("ATC L1 - L").set_fontsize(12)
ax[5, 0].set_title("ATC L1 - A").set_fontsize(12)
ax[5, 1].set_title("ATC L1 - G").set_fontsize(12)
ax[6, 0].set_title("ATC L1 - M").set_fontsize(12)
ax[6, 1].set_title("ATC L1 - P").set_fontsize(12)


plt.savefig('../../data_results/scatterMulti.png', dpi=400,bbox_inches='tight')