In [4]:
#step 1 import required packages
import pandas as pd # load pandas package for importing csv files in correct format
# import as defines the identifier you want to use for the function e.g in this case 
# pd will call the pandas package
import matplotlib # load matplotlib package for plotting graphs
import os



In [6]:
#step 2 load open prescribing measures data to filter out a subset for testing all scripts
data = pd.read_csv('measures.csv', sep=',') #data is sepperated by , commas


In [7]:
#step 3 check data has imported correctly 
data.head(3) # Show top 3 entries to confirm successful data import

Unnamed: 0,measure,org_type,org_id,org_name,date,numerator,denominator,calc_value,percentile
0,ktt9_antibiotics,practice,P84650,THE ALEXANDRA PRACTICE,2014-10-01,199.0,3106.653507,0.064056,9.181078
1,ktt9_antibiotics,practice,P84673,ANCOATS URBAN VILLAGE MEDICAL PRACTICE,2014-10-01,463.0,4252.799604,0.108869,75.8647
2,ktt9_antibiotics,practice,P84071,WILBRAHAM SURGERY,2014-10-01,219.0,2275.856965,0.096227,57.540692


In [8]:
dataframe = pd.DataFrame(data) #Convert imported data to a dataframe using pandas
dataframe.head(3) #view the first three lines of the dataframe to check import has been successful

Unnamed: 0,measure,org_type,org_id,org_name,date,numerator,denominator,calc_value,percentile
0,ktt9_antibiotics,practice,P84650,THE ALEXANDRA PRACTICE,2014-10-01,199.0,3106.653507,0.064056,9.181078
1,ktt9_antibiotics,practice,P84673,ANCOATS URBAN VILLAGE MEDICAL PRACTICE,2014-10-01,463.0,4252.799604,0.108869,75.8647
2,ktt9_antibiotics,practice,P84071,WILBRAHAM SURGERY,2014-10-01,219.0,2275.856965,0.096227,57.540692


In [24]:
def import_OP_data(filename):
    'This function imports an csv file downloaded from the Open Prescribing website into a dataframe for analysis in Python'
    data = pd.read_csv(filename, sep=',')#load OP data to filter out a subset for testing all scripts
    dataframe = pd.DataFrame(data) #Convert imported data to a dataframe using pandas
    return print('These are the first 3 rows of the dataset. Does it look correct?',
                 dataframe.head(3)) #view the first three lines of the dataframe to check import has been successful

In [25]:
import_OP_data('measures.csv')

These are the first 3 rows of the dataset. Does it look correct?             measure  org_type  org_id                                org_name  \
0  ktt9_antibiotics  practice  P84650                  THE ALEXANDRA PRACTICE   
1  ktt9_antibiotics  practice  P84673  ANCOATS URBAN VILLAGE MEDICAL PRACTICE   
2  ktt9_antibiotics  practice  P84071                       WILBRAHAM SURGERY   

         date  numerator  denominator  calc_value  percentile  
0  2014-10-01      199.0  3106.653507    0.064056    9.181078  
1  2014-10-01      463.0  4252.799604    0.108869   75.864700  
2  2014-10-01      219.0  2275.856965    0.096227   57.540692  


In [38]:
def remove_OP_artifacts(dataframe):
    dataframe_Sort = dataframe.sort_values("numerator")#sort dataframe based on number of prescriptions
    PCo = len(dataframe_Sort[dataframe_Sort['numerator'] == 0.0]) #count number of rows with 0 prescriptions
    AntiP =  dataframe_Sort['numerator']>0.0 #Identify all rows with a prescription value greater than 0.0
    df_SF = dataframe_Sort[AntiP] #Filter the dataset, keeping all rows with prescriptions >0 (i.e. = 'TRUE')
    #First values of prescripitions in dataset = 1.0
    #Rows with values of 0 have been filtered correctly
    #Antibiotics per 1000. Values =0.0 = <1 prescription per 1000 people? Check openprescribing.net description?
    #Check all rows with 0 prescriptions have been removed
    df_SF_min = min(df_SF.numerator) # check minimum value is not 0.0
    if PCo>0: #Create an if statement to interpret the presence of rows with 0 prescriptions
        return print('This dataset contains', PCo, 
                  'entries with 0 prescriptions. These entries have been removed from the dataset.', 
                  'The smallest number of prescriptions is:', df_SF_min, 'per 1000 patients')
                    #print warning meassage to user outlining the number of blank rows detected using an if statement 
    else:
        return print('This dataset contains', PCo, 
          'entries with 0 prescriptions. No entries have been removed from the dataset. The smallest number of prescriptions is:',
                 PCo) #Notify the user if no rows were found with zero prescriptions

In [39]:
remove_OP_artifacts(dataframe)


This dataset contains 345 entries with 0 prescriptions. These entries have been removed from the dataset. The smallest number of prescriptions is: 1.0 per 1000 patients
