In [None]:
## Import data from CSV file
# Practice-level data, range December 2018 to November 2019
# Other category (as per BNF 040304) contains venlafaxine, mirtazapine, and duloxetine

import pandas as pd
import numpy as np
import os
os.chdir("/Users/paulbogowicz/Documents/Research/Oxford University/OpenPrescribing - antidepressants/Data")

df1 = pd.read_csv("antidepressants_last_12_months.csv")
df1 = df1.rename(columns={"practice":"Practice","month":"Month","total_items":"Total","total_cost":"Cost","MAOi":"MAOI"})
print("No of practices: ", df1["Practice"].nunique())
df1.head()

### Practice-level calculations

In [2]:
# Calculate class totals

df1["Others"] = df1["Other"] - df1["Venlafaxine"] - df1["Duloxetine"] + df1["Maprotiline"] + df1["Mianserin"] + df1["Trazodone"]
df1["SSRI"] = df1.loc[:,["Citalopram","Escitalopram","Fluoxetine","Fluvoxamine","Paroxetine","Sertraline"]].sum(axis = 1)
df1["SNRI"] = df1.loc[:,["Duloxetine","Venlafaxine"]].sum(axis = 1)
df1["Tricyclic"] = df1["Tricyclics"] - df1["Maprotiline"] - df1["Mianserin"] - df1["Trazodone"]
df1["Test"] = df1["Total"] - df1["Others"] - df1["SSRI"] - df1["SNRI"] - df1["Tricyclic"] - df1["MAOI"]
print(df1.loc[:,"Test"].sum(axis=0))

0


In [3]:
# Calculate percentages for each class

df1["SSRI %"] = 100*df1["SSRI"]/df1["Total"]
df1["SNRI %"] = 100*df1["SNRI"]/df1["Total"]
df1["MAOI %"] = 100*df1["MAOI"]/df1["Total"]
df1["Tricyclic %"] = 100*df1["Tricyclic"]/df1["Total"]
df1["Other %"] = 100*df1["Others"]/df1["Total"]

# Calculate percentages for top 10 most prescribed (2018) and others of interest

df1["Amitriptyline %"] = 100*df1["Amitriptyline"]/df1["Total"]
df1["Citalopram %"] = 100*df1["Citalopram"]/df1["Total"]
df1["Duloxetine %"] = 100*df1["Duloxetine"]/df1["Total"]
df1["Escitalopram %"] = 100*df1["Escitalopram"]/df1["Total"]
df1["Fluoxetine %"] = 100*df1["Fluoxetine"]/df1["Total"]
df1["Mirtazapine %"] = 100*df1["Mirtazapine"]/df1["Total"]
df1["Paroxetine %"] = 100*df1["Paroxetine"]/df1["Total"]
df1["Sertraline %"] = 100*df1["Sertraline"]/df1["Total"]
df1["Trazodone %"] = 100*df1["Trazodone"]/df1["Total"]
df1["Venlafaxine %"] = 100*df1["Venlafaxine"]/df1["Total"]
df1["Dosulepin %"] = 100*df1["Dosulepin"]/df1["Total"]
df1["Trimipramine %"] = 100*df1["Trimipramine"]/df1["Total"]
df1["Isocarboxazid %"] = 100*df1["Isocarboxazid"]/df1["Total"]
df1["Iproniazid %"] = 100*df1["Iproniazid"]/df1["Total"]
df1["Tranylcypromine %"] = 100*df1["Tranylcypromine"]/df1["Total"]
df1["Moclobemide %"] = 100*df1["Moclobemide"]/df1["Total"]
df1["Phenelzine %"] = 100*df1["Phenelzine"]/df1["Total"]

In [4]:
# Calculate summary stats for different classes of antidepressants, at practice level

import scipy.stats as stats

# Calculate summary statistics, select columns, and transpose

smry = df1.describe(percentiles={0.1,0.25,0.5,0.75,0.9}) # summary statistics
smry = smry.loc[:,["SSRI %", "SNRI %", "Tricyclic %", "MAOI %","Other %"]]
smry = smry.transpose()

# Calculate IQR and kurtosis

smry["IQR"] = smry.loc[:,"75%"] - smry.loc[:,"25%"]
smry["IDR"] = smry.loc[:,"90%"] - smry.loc[:,"10%"]
k = pd.Series(name="Kurtosis",index=["SSRI %", "SNRI %", "Tricyclic %", "MAOI %","Other %"])
for i in range(0,len(k)):
    k[i] = stats.kurtosis(df1[k.index[i]])
kdf = k.to_frame()
smry["Kurtosis"] = kdf["Kurtosis"]

# Select columns and print

smry2 = pd.DataFrame(smry,columns=["mean", "std","10%","25%","50%","75%","90%","IQR","IDR","Kurtosis"])
smry2 = smry2.rename(columns={'mean': 'Mean', "10%":"Lowest decile",'25%': 'Lower Quartile', '50%': 'Median', '75%': 'Upper Quartile',"90%":"Highest decile","std":"Std Dev"})
smry2


Unnamed: 0,Mean,Std Dev,Lowest decile,Lower Quartile,Median,Upper Quartile,Highest decile,IQR,IDR,Kurtosis
SSRI %,54.459472,6.632071,46.393925,50.484663,54.660194,58.531195,62.157709,8.046533,15.763784,1.622537
SNRI %,9.244835,3.266697,5.532049,7.119322,8.978145,11.055069,13.262293,3.935747,7.730243,7.739061
Tricyclic %,21.540219,5.44418,15.348039,18.167634,21.231423,24.502506,27.908365,6.334872,12.560326,3.077487
MAOI %,0.05674,0.1634,0.0,0.0,0.0,0.047534,0.172384,0.047534,0.172384,91.422811
Other %,14.698734,5.340459,8.738215,11.11823,14.236186,17.576328,20.953369,6.458098,12.215154,7.868434


In [5]:
# Calculate summary stats for top 10 most prescribed antidepressants (2018), at practice level

# Calculate summary statistics, select columns, and transpose

smry3 = df1.describe(percentiles={0.1,0.25,0.5,0.75,0.9}) # summary statistics
smry3 = smry3.loc[:,["Sertraline %", "Citalopram %", "Amitriptyline %", "Mirtazapine %","Fluoxetine %","Venlafaxine %","Duloxetine %","Paroxetine %","Trazodone %","Escitalopram %"]]
smry3 = smry3.transpose()

# Calculate IQR and kurtosis

smry3["IQR"] = smry3.loc[:,"75%"] - smry3.loc[:,"25%"]
smry3["IDR"] = smry3.loc[:,"90%"] - smry3.loc[:,"10%"]
k2 = pd.Series(name="Kurtosis",index=["Sertraline %", "Citalopram %", "Amitriptyline %", "Mirtazapine %","Fluoxetine %","Venlafaxine %","Duloxetine %","Paroxetine %","Trazodone %","Escitalopram %"])
for i in range(0,len(k2)):
    k2[i] = stats.kurtosis(df1[k2.index[i]])
kdf2 = k2.to_frame()
smry3["Kurtosis"] = kdf2["Kurtosis"]

# Select columns and print

smry4 = pd.DataFrame(smry3,columns=["mean", "std","10%","25%","50%","75%","90%","IQR","IDR","Kurtosis"])
smry4 = smry4.rename(columns={'mean': 'Mean', "10%":"Lowest decile",'25%': 'Lower Quartile', '50%': 'Median', '75%': 'Upper Quartile',"90%":"Highest decile","std":"Std Dev"})
smry4

Unnamed: 0,Mean,Std Dev,Lowest decile,Lower Quartile,Median,Upper Quartile,Highest decile,IQR,IDR,Kurtosis
Sertraline %,21.636045,5.785686,14.577695,17.998734,21.438172,25.092722,28.622076,7.093988,14.04438,2.573198
Citalopram %,20.006316,6.131975,12.551306,15.846965,19.727891,23.809839,27.762333,7.962874,15.211028,1.152793
Amitriptyline %,18.904406,5.225218,13.005788,15.630137,18.595041,21.757698,25.090626,6.127561,12.084838,2.721116
Mirtazapine %,12.92087,4.893847,7.498888,9.634294,12.432984,15.460339,18.638685,5.826045,11.139797,9.20979
Fluoxetine %,9.093048,3.328393,5.357556,6.920968,8.689899,10.865282,13.236811,3.944313,7.879254,3.485943
Venlafaxine %,6.032764,2.587775,3.102614,4.312106,5.768194,7.469895,9.289809,3.157789,6.187195,18.46775
Duloxetine %,3.21207,2.045403,1.140025,1.835498,2.821869,4.14652,5.749002,2.311022,4.608977,28.263012
Paroxetine %,1.937938,1.278329,0.71522,1.164747,1.697128,2.427052,3.339917,1.262305,2.624697,27.196038
Trazodone %,1.396283,1.605092,0.041738,0.35606,0.922084,1.866841,3.294729,1.510781,3.252991,35.128164
Escitalopram %,1.751579,2.112115,0.235206,0.561273,1.111586,2.145273,3.906394,1.584001,3.671189,27.032269


In [6]:
# Calculate summary stats for drugs that shouldn't be prescribed, at practice level

# Calculate summary statistics, select columns, and transpose

smry5 = df1.describe(percentiles={0.1,0.25,0.5,0.75,0.9}) # summary statistics
smry5 = smry5.loc[:,["Paroxetine %","Dosulepin %","Trimipramine %"]]
smry5 = smry5.transpose()

# Calculate IQR and kurtosis

smry5["IQR"] = smry5.loc[:,"75%"] - smry5.loc[:,"25%"]
smry5["IDR"] = smry5.loc[:,"90%"] - smry5.loc[:,"10%"]
k3 = pd.Series(name="Kurtosis",index=["Paroxetine %","Dosulepin %","Trimipramine %"])
for i in range(0,len(k3)):
    k3[i] = stats.kurtosis(df1[k3.index[i]])
kdf3 = k3.to_frame()
smry5["Kurtosis"] = kdf3["Kurtosis"]

# Select columns and print

smry6 = pd.DataFrame(smry5,columns=["mean", "std","10%","25%","50%","75%","90%","IQR","IDR","Kurtosis"])
smry6 = smry6.rename(columns={'mean': 'Mean', "10%":"Lowest decile",'25%': 'Lower Quartile', '50%': 'Median', '75%': 'Upper Quartile',"90%":"Highest decile","std":"Std Dev"})
smry6

Unnamed: 0,Mean,Std Dev,Lowest decile,Lower Quartile,Median,Upper Quartile,Highest decile,IQR,IDR,Kurtosis
Paroxetine %,1.937938,1.278329,0.71522,1.164747,1.697128,2.427052,3.339917,1.262305,2.624697,27.196038
Dosulepin %,0.877614,0.878839,0.041067,0.301243,0.679049,1.193951,1.860969,0.892708,1.819902,46.811351
Trimipramine %,0.051874,0.176268,0.0,0.0,0.0,0.0,0.154267,0.0,0.154267,113.775665


In [7]:
# Calculate summary stats for MAOIs, at practice level

# Calculate summary statistics, select columns, and transpose

smry7 = df1.describe(percentiles={0.1,0.25,0.5,0.75,0.9}) # summary statistics
smry7 = smry7.loc[:,["Isocarboxazid %","Iproniazid %","Tranylcypromine %","Moclobemide %","Phenelzine %"]]
smry7 = smry7.transpose()

# Calculate IQR and kurtosis

smry7["IQR"] = smry7.loc[:,"75%"] - smry7.loc[:,"25%"]
smry7["IDR"] = smry7.loc[:,"90%"] - smry7.loc[:,"10%"]
k4 = pd.Series(name="Kurtosis",index=["Isocarboxazid %","Iproniazid %","Tranylcypromine %","Moclobemide %","Phenelzine %"])
for i in range(0,len(k4)):
    k4[i] = stats.kurtosis(df1[k4.index[i]])
kdf4 = k4.to_frame()
smry7["Kurtosis"] = kdf4["Kurtosis"]

# Select columns and print

smry8 = pd.DataFrame(smry7,columns=["mean", "std","10%","25%","50%","75%","90%","IQR","IDR","Kurtosis"])
smry8 = smry8.rename(columns={'mean': 'Mean', "10%":"Lowest decile",'25%': 'Lower Quartile', '50%': 'Median', '75%': 'Upper Quartile',"90%":"Highest decile","std":"Std Dev"})
smry8

Unnamed: 0,Mean,Std Dev,Lowest decile,Lower Quartile,Median,Upper Quartile,Highest decile,IQR,IDR,Kurtosis
Isocarboxazid %,0.002491,0.033734,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1039.77179
Iproniazid %,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.0
Tranylcypromine %,0.009949,0.076005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,564.442868
Moclobemide %,0.025514,0.111751,0.0,0.0,0.0,0.0,0.066827,0.0,0.066827,247.526102
Phenelzine %,0.018785,0.078297,0.0,0.0,0.0,0.0,0.052876,0.0,0.052876,167.716068
