# Are statin prescribing ratios different for dispensing practices?

#### 1. Obtain GP Prescribing data
#### 2. Cut GP Prescribing data for things of interest (statins)
#### 3. Obtain list of dispensing practices
#### 3a. Add codes to list of dispensing practices (name and address only is provided)
#### 4. Combine statin cut and dispensing practice list
#### 5. Compare dispensing vs non-dispensing statin prescribing rations

In [57]:
import requests
from StringIO import StringIO
from numpy import nan
import pandas as pd
import xlrd 
import csv

In [106]:
def fetch_prescribing_data():
    url = 'http://datagov.ic.nhs.uk/presentation/2015_01_January/T201501PDPI+BNFT.CSV' #gp prescribing data (Jan 2015)
    r = requests.get(url)
    data = r.content
    df = pd.read_csv(StringIO(data))

def clean_prescribing_data():
    df.columns = [x.strip() for x in df.columns] #gets rid of variable whitespace
    df = df[df['BNF NAME'].str.contains('statin')] #cut for rows with statin in them and throw away unwanted statins
    df = df[~df['BNF NAME'].str.contains('Nystatin')]
    df = df[~df['BNF NAME'].str.contains('Sandostatin')]
    df = df[~df['BNF NAME'].str.contains('Ecostatin')]
    df.to_csv('StatinsJan2015.csv') #save the result


In [111]:
#fetch_prescribing_data()
#clean_prescribing_data()

df = pd.read_csv('StatinsJan2015.csv') #load cleaned prescribing data cut



In [112]:
df.head()

Unnamed: 0.1,Unnamed: 0,SHA,PCT,PRACTICE,BNF CODE,BNF NAME,ITEMS,NIC,ACT COST,QUANTITY,PERIOD,Unnamed: 11
0,99,Q44,RXA,Y00327,0212000Y0AAABAB,Simvastatin_Tab 20mg,1,0.14,0.24,4,201501,
1,100,Q44,RXA,Y00327,0212000Y0AAADAD,Simvastatin_Tab 40mg,1,0.08,0.19,2,201501,
2,586,Q44,RY7,N85638,0212000B0AAACAC,Atorvastatin_Tab 40mg,2,0.58,0.76,10,201501,
3,587,Q44,RY7,N85638,0212000Y0AAADAD,Simvastatin_Tab 40mg,2,0.6,0.78,14,201501,
4,1056,Q44,RY7,N85645,0212000Y0AAABAB,Simvastatin_Tab 20mg,1,0.11,0.21,3,201501,


In [99]:
df1 = pd.read_excel('data/Disp Pracs Name and Address 2015-01-31.xls')

In [184]:
len(df1)

1562

In [130]:
df1['Dispensing Practices Address Details'] = df1['Dispensing Practices Address Details'].str.upper()

In [149]:
df1 = df1['Dispensing Practices Address Details'].str.split(',')

In [175]:
dpnames = df1.str[0] #dispensing practice names

In [116]:
df2 = pd.read_csv('datas/epraccur.csv') #from http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/epraccur.zip

In [183]:
len(df2[df2.icol(1).isin(dpnames)]) #1182 != 1562 so we're missing some

1182

In [190]:
dpcodes = df2[df2.icol(1).isin(dpnames)].icol(0) #dispensing practice codes

In [204]:
len(df[df['PRACTICE'].isin(dpcodes)]['PRACTICE'].unique()) #dispensing practices we can identify in prescribing data

1168

In [240]:
dpp = df[df['PRACTICE'].isin(dpcodes)] #dispensing practice prescribing
ndpp = df[~df['PRACTICE'].isin(dpcodes)]#not dispensing practice prescribing


In [280]:
rp_dpp = dpp[dpp['BNF NAME'].str.contains('Rosuvastatin')].ITEMS.sum() #rosuvastatin prescriptions
nrp_dpp = dpp[~dpp['BNF NAME'].str.contains('Rosuvastatin')].ITEMS.sum() #not rosuvastatin prescriptions
assert(dpp.ITEMS.sum() == rp_dpp + nrp_dpp) #sanity check
percent_r_dpp = float(rp_dpp) / float(nrp_dpp) * 100 #percentage rosuvastatin prescribing
print percent_r_dpp

4.11664201488


In [281]:
rp_ndpp = ndpp[ndpp['BNF NAME'].str.contains('Rosuvastatin')].ITEMS.sum() #rosuvastatin prescriptions
nrp_ndpp = ndpp[~ndpp['BNF NAME'].str.contains('Rosuvastatin')].ITEMS.sum() #not rosuvastatin prescriptions
assert(ndpp.ITEMS.sum() == rp_ndpp + nrp_ndpp) #sanity check
percent_r_ndpp = float(rp_ndpp) / float(nrp_ndpp) * 100 #percentage rosuvastatin prescribing
print percent_r_ndpp

2.63328926643
