In [1]:
import warnings
from copy import deepcopy
import pandas as pd
import numpy as np
import re

In [2]:
warnings.filterwarnings('ignore')

In [3]:
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [4]:
grx_df = pd.read_csv('../Data Sources/goodrx_scraped.csv')

In [5]:
# used deep copy to sort by drug name.
grx_df = deepcopy(grx_df.sort_values(by = 'drug_name', ascending = True))

In [7]:
grx_df.head()

Index(['drug_name', 'drug_notice', 'drug_url', 'generic_name',
       'grx_affordability_rank', 'grx_avg_cash_price',
       'grx_lowest_price', 'grx_no_prices_found', 'grx_not_avail',
       'grx_over_counter', 'grx_pharmacy', 'grx_popularity_rank',
       'grx_purchase', 'url_id'],
      dtype='object')

In [7]:
grx_df.columns

Index(['drug_name', 'drug_notice', 'drug_url', 'generic_name',
       'grx_affordability_rank', 'grx_avg_cash_price',
       'grx_lowest_price', 'grx_no_prices_found', 'grx_not_avail',
       'grx_over_counter', 'grx_pharmacy', 'grx_popularity_rank',
       'grx_purchase', 'url_id'],
      dtype='object')

In [6]:
# created codes for url so that I could give each drug a unique identifier.
grx_df['url_id'] = grx_df['drug_url'].astype('category').cat.codes
# sorted alphabetically, and by price, so that if there are any duplicates,
# the cheapest version of a drug is at the top.
grx_df = deepcopy(grx_df.sort_values(by = ['url_id', 'grx_lowest_price'], ascending = True))

In [8]:
# for any duplicate rows of a specific drug, the least expensive version was kept.
duplicate_list = ['drug_name', 'drug_notice', 'drug_url', 'generic_name',
                  'grx_affordability_rank', 'grx_avg_cash_price',
                  'grx_competition_amount', 'grx_drug_warning',
                  'grx_fair_price', 'grx_no_prices_found', 'grx_not_avail',
                  'grx_over_counter', 'grx_pharmacy', 'grx_popularity_rank',
                  'grx_purchase', 'url_id']

grx_df.drop_duplicates(subset = duplicate_list, keep = 'first', inplace = True)

In [9]:
# removed all drugs that were labelled as over the counter.
grx_df = deepcopy(grx_df[grx_df['grx_over_counter'] != 'Over the counter medication'])

In [10]:
# removed all drugs that are not sold in pharmacies.
grx_df = deepcopy(grx_df[grx_df['drug_notice'] != 'This prescription is provided by a doctor and is generally not available at a pharmacy'])

In [11]:
# created new column to determine which drugs are currently available on the market.
grx_df['grx_not_avail_conf'] = (grx_df['grx_not_avail'].isnull() == False) & (grx_df['grx_fair_price'].isnull())
# removed all drugs that were marked as not currently available on the market.
grx_df = deepcopy(grx_df[grx_df['grx_not_avail_conf'] == False])

In [12]:
# removed all drugs that did not have a listing price.
grx_df = deepcopy(grx_df[grx_df['grx_lowest_price'].isnull() == False])
grx_df = deepcopy(grx_df[(grx_df['grx_no_prices_found'].isnull())])

In [13]:
# removed the following fields because they were no longer needed:
grx_df.drop(['url_id', 'drug_url', 'grx_over_counter', 'drug_notice',
             'grx_not_avail', 'grx_not_avail_conf', 'grx_no_prices_found',
             'grx_avg_cash_price', 'grx_fair_price', 'grx_drug_warning'], axis = 1, inplace = True)

In [14]:
# kept drugs where generic names do not exist. renamed them accordingly.
grx_df.loc[grx_df['generic_name'].isnull(), 'generic_name'] = 'Non-Existant'

In [15]:
# removing drugs sold at pet pharmacies from the dataframe.
grx_df = deepcopy(grx_df[(grx_df['grx_pharmacy'] != 'KVSupply') & (grx_df['grx_pharmacy'] != 'Heartland Vet Supply')])

In [16]:
# changed data type to enable string editing.
grx_df['grx_lowest_price'] = grx_df['grx_lowest_price'].astype(str)
# ficed formatting for grx_lowest_price column.
# gave all free drugs a price of $0.
grx_df['grx_lowest_price'] = ([s.strip('$') for s in grx_df['grx_lowest_price']])
grx_df['grx_lowest_price'] = ([s.replace(',', '') for s in grx_df['grx_lowest_price']])
grx_df.loc[grx_df['grx_lowest_price'] == 'Free', 'grx_lowest_price'] = 0
# switched data type to float for numerical analysis.
grx_df['grx_lowest_price'] = grx_df['grx_lowest_price'].astype(float)

In [17]:
# sorted data by most expensive drug.
# dropped duplicate index from dataframe.
grx_df = deepcopy(grx_df.sort_values(by = 'grx_lowest_price', ascending = False)).reset_index()
grx_df.drop('index', axis = 1, inplace = True)

In [18]:
# edited names of certain drugs to enable future join on drug_name/generic_name.
grx_df.loc[1343, 'drug_name'] = 'Diazepam'
grx_df.loc[1343, 'drug_name'] = 'Diazepam'
grx_df.loc[2270, 'drug_name'] = 'Differin'
grx_df.loc[2333, 'drug_name'] = 'Voltaren'
grx_df.loc[3147, 'drug_name'] = 'SF'
grx_df.loc[1237, 'generic_name'] = 'Epinephrine'

In [19]:
# fixed formatting in affordability_rank, popularity_rank and competition_amount.
grx_df['grx_affordability_rank'] = deepcopy(grx_df['grx_affordability_rank'].str.strip('th'))
grx_df['grx_popularity_rank'] = deepcopy(grx_df['grx_popularity_rank'].str.strip('th'))
grx_df.loc[grx_df['grx_competition_amount'].isnull(), 'grx_competition_amount'] = ',,'
grx_df['grx_competition_amount'] = [s.split(',')[2] for s in grx_df['grx_competition_amount']]
grx_df.loc[grx_df['grx_competition_amount'] == '', 'grx_competition_amount'] = None

In [20]:
# removed the other data fields because they were null for almost half of the dataset.
grx_df.drop(['grx_affordability_rank', 'grx_popularity_rank',
             'grx_competition_amount'], axis = 1, inplace = True)

In [21]:
# rearranged words to make regex easier later in notebook.
grx_df.loc[1, 'grx_purchase'] = '2 vials (5ml) of H.P.Acthar 80 units/ml'
grx_df.loc[93, 'grx_purchase'] = "1 carton (2 bottles) of Braftovi 75mg"
grx_df.loc[109, 'grx_purchase'] = '2 vials of Perjeta 420mg/14ml'
grx_df.loc[110, 'grx_purchase'] = '2 vials of Perjeta 420mg/14ml'
grx_df.loc[158, 'grx_purchase'] = '1 syringe of Neulasta 6mg/0.6ml'
grx_df.loc[159, 'grx_purchase'] = '1 syringe of Neulasta 6mg/0.6ml'
grx_df.loc[167, 'grx_purchase'] = '90 tablets of Noxafil 100mg'
grx_df.loc[564, 'grx_purchase'] = '60 patches of Flector 1.3%'
grx_df.loc[742, 'grx_purchase'] = '1 inhaler of Nicotrol 10mg'
grx_df.loc[836, 'grx_purchase'] = '1 dose of Rabavert 2.5iu/ml'
grx_df.loc[837, 'grx_purchase'] = '1 dose of Rabavert 2.5iu/ml'
grx_df.loc[1124, 'grx_purchase'] = '1 device of Primabella '
grx_df.loc[1534, 'grx_purchase'] = '30 patches of lidocaine 5%'
grx_df.loc[1623, 'grx_purchase'] = '10 syringes of enoxaparin 100mg/ml'
grx_df.loc[1689, 'grx_purchase'] = '1 spacer of Flexichamber '
grx_df.loc[1777, 'grx_purchase'] = '100 test strips of Truetrack '
grx_df.loc[1940, 'grx_purchase'] = ' 1 spacer of Easivent '
grx_df.loc[2056, 'grx_purchase'] = '12 packets of imiquimod 5%'
grx_df.loc[2057, 'grx_purchase'] = '12 packets of imiquimod 5%'
grx_df.loc[2838, 'grx_purchase'] = '1 spacer of Prochamber '
grx_df.loc[639, 'grx_purchase'] = '1 Vial'
grx_df.loc[640, 'grx_purchase'] = '1 Vial'
grx_df.loc[767, 'grx_purchase'] = '1 Vial'
grx_df.loc[3293, 'grx_purchase'] = '1 Vial'

In [22]:
# fixed string formatting to make regex easier later in notebook.
grx_df['grx_purchase'] = deepcopy(([s.replace('17g/dose', '17g') for s in grx_df['grx_purchase']]))
grx_df['grx_purchase'] = deepcopy(([s.replace('five 3ml', '5') for s in grx_df['grx_purchase']]))
grx_df['grx_purchase'] = deepcopy(([s.replace('1 pen', '1 pens') for s in grx_df['grx_purchase']]))
grx_df['grx_purchase'] = deepcopy(([s.replace('two 3ml pens', '2 pens') for s in grx_df['grx_purchase']]))
grx_df['grx_purchase'] = deepcopy(([s.replace('five 3ml pens', '5 pens') for s in grx_df['grx_purchase']]))

In [23]:
# used regex to make a new column 'grx_purchase_qty', of the number of units in a grx_purchase.
for i, qty in enumerate(grx_df['grx_purchase'].str.lower()):
    if 'capsule' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+capsule)', qty).group()
    elif 'sublingual tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+sublingual tablet)', qty).group()
    elif 'effervescent tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+effervescent tablet)', qty).group()
    elif 'chewable tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+chewable tablet)', qty).group()
    elif 'buccal tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+buccal tablet)', qty).group()
    elif 'orally disintegrating tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+orally disintegrating tablet)', qty).group()
    elif 'tablet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+tablet)', qty).group()
    elif 'caplet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+caplet)', qty).group()
    elif 'single-use vial' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+single-use vial)', qty).group()
    elif 'vial' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+vial)', qty).group()
    elif 'ampule' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+ampule)', qty).group()
    elif 'nuspins' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+nuspins)', qty).group()
    elif 'implant' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+implant)', qty).group()
    elif 'packet' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+packet)', qty).group()
    elif 'blisters' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+blisters)', qty).group()
    elif 'cartridge' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+cartridge)', qty).group()
    elif 'suppositories' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+suppositories)', qty).group()
    elif 'once-weekly patches' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+once-weekly patches)', qty).group()
    elif 'twice-weekly patches' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+twice-weekly patches)', qty).group()
    elif 'patches' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+patches)', qty).group()
    elif 'metered sprays' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+metered sprays)', qty).group()
    elif 'nasal spray' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+nasal spray)', qty).group()
    elif 'eye dropper' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+eye dropper)', qty).group()
    elif 'ear dropper' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+ear dropper)', qty).group()
    elif 'dropper' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+dropper)', qty).group()
    elif 'surclicks' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+surclicks)', qty).group()
    elif 'autoinjector' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+autoinjector)', qty).group()
    elif 'auto-injector' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+auto-injector)', qty).group()
    elif 'syringe' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+syringe)', qty).group()
    elif 'gel pump' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+gel pump)', qty).group()
    elif 'can' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+can)', qty).group()
    elif 'sensoready pens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+sensoready pens)', qty).group()
    elif 'solostar pens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+solostar pens)', qty).group()
    elif 'flexpens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+flexpens)', qty).group()
    elif 'junior kwikpens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+junior kwikpens)', qty).group()
    elif 'kwikpens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+kwikpens)', qty).group()
    elif 'flextouch pens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+flextouch pens)', qty).group()
    elif 'prefilled 1.5ml pens' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+prefilled 1.5ml pens)', qty).group()
    elif 'pen needles' in qty and 'suspension' not in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+pen needles)', qty).group()
    elif 'pens' in qty and 'suspension' not in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+pens)', qty).group()
    elif 'carton' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+carton)', qty).group()
    elif 'kit' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+kit)', qty).group()        
    elif 'diskus inhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+diskus inhaler)', qty).group()
    elif 'hfa inhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+hfa inhaler)', qty).group()
    elif 'respiclick inhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+respiclick inhaler)', qty).group()
    elif 'respimat inhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+respimat inhaler)', qty).group()
    elif 'inhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+inhaler)', qty).group()
    elif 'days' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+days)', qty).group()
    elif 'dose pack' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+dose pack)', qty).group()
    elif 'dose' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+dose)', qty).group()        
    elif 'bottle' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+bottle)', qty).group()
    elif 'tube' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+tube)', qty).group()
    elif 'injections' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+injections)', qty).group()
    elif 'jar' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+jar)', qty).group()
    elif 'applicators' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+applicators)', qty).group()
    elif 'orally disintegrating tabs' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+orally disintegrating tabs)', qty).group()
    elif 'test strips' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+test strips)', qty).group()
    elif 'buccal films' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+buccal films)', qty).group()
    elif 'films' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+films)', qty).group()
    elif 'insert' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+insert)', qty).group()
    elif 'flexhaler' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+flexhaler)', qty).group()
    elif 'applicator' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+applicator)', qty).group()
    elif 'cream pump' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+cream pump)', qty).group()
    elif 'package' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+package)', qty).group()
    elif 'day flash' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+day flash)', qty).group()
    elif 'ring' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+ring)', qty).group()
    elif 'flexpro' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+flexpro)', qty).group()
    elif 'iud' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+iud)', qty).group()
    elif 'device' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+device)', qty).group()
    elif 'lozenges' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+lozenges)', qty).group()
    elif 'enemas' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+enemas)', qty).group()
    elif 'lancets' in qty:
        grx_df.loc[i, 'grx_purchase_qty'] = re.search(r'\d+(?=\s+lancets)', qty).group()
    else:
        grx_df.loc[i, 'grx_purchase_qty'] = 'Error'

In [24]:
# manually determined grx_purchase_qty for rows where regex was more difficult.
grx_df.loc[568, 'grx_purchase_qty'] = '60'
grx_df.loc[630, 'grx_purchase_qty'] = '28'
grx_df.loc[887, 'grx_purchase_qty'] = '360'
grx_df.loc[1050, 'grx_purchase_qty'] = '30'
grx_df.loc[2501, 'grx_purchase_qty'] = '120'
grx_df.loc[3267, 'grx_purchase_qty'] = '10'
grx_df.loc[3268, 'grx_purchase_qty'] = '10'
grx_df.loc[3269, 'grx_purchase_qty'] = '10'
grx_df.loc[3270, 'grx_purchase_qty'] = '10'

In [25]:
# declared all remaining rows to be a unit of one.
grx_df['grx_purchase_qty'] = deepcopy(([s.replace('Error', '1') for s in grx_df['grx_purchase_qty']]))

In [26]:
# created a new column grx_qty_coeff to arbitrarily multiply grx_purchase_qty
# by an estimated number of doses in a unit.
# all drugs that were not droppers, tubes, sprays, inhalers, or cartridges were given a coefficient of 1.
for i, qty in enumerate(grx_df['grx_purchase'].str.lower()):
    if 'dropper' in qty:
        grx_df.loc[i, 'grx_qty_coeff'] = 100
    if 'tube' in qty:
        grx_df.loc[i, 'grx_qty_coeff'] = 100
    if 'spray' in qty:
        grx_df.loc[i, 'grx_qty_coeff'] = 100
    if 'inhaler' in qty:
        grx_df.loc[i, 'grx_qty_coeff'] = 200
    if 'cartridge' in qty:
        grx_df.loc[i, 'grx_qty_coeff'] = 200
    else:
        grx_df.loc[i, 'grx_qty_coeff'] = 1

In [27]:
# changed columns to datatypes for analysis.
grx_df['grx_purchase_qty'] = deepcopy(grx_df['grx_purchase_qty'].astype(float))
grx_df['grx_qty_coeff'] = deepcopy(grx_df['grx_qty_coeff'].astype(float))

In [28]:
# created new column grx_purchase_qty_adj to be number of doses in a purchase.
# created new column grx_lowest_price_adj to be lowest price per dose of a given drug.
grx_df['grx_purchase_qty_adj'] = deepcopy((grx_df['grx_purchase_qty'] * grx_df['grx_qty_coeff']))
grx_df['grx_lowest_price_adj'] = deepcopy((grx_df['grx_lowest_price']/grx_df['grx_purchase_qty_adj']))

In [29]:
# changed columns to datatypes for analysis.
grx_df['grx_purchase_qty_adj'] = deepcopy(grx_df['grx_purchase_qty_adj'].astype(float))
grx_df['grx_lowest_price_adj'] = deepcopy(grx_df['grx_lowest_price_adj'].astype(float))

In [30]:
# dropped column that were only created to generate other columns.
grx_df.drop(['grx_purchase_qty', 'grx_purchase_qty_adj', 'grx_qty_coeff'], axis = 1, inplace = True)

In [31]:
grx_df.head()

Unnamed: 0,drug_name,generic_name,grx_lowest_price,grx_pharmacy,grx_purchase,grx_lowest_price_adj
0,Ruconest,Non-Existant,144372.0,Costco,1 vial (25ml) of Ruconest 2100u,144372.0
1,Corticotropin,H.P. Acthar,77248.0,Costco,2 vials (5ml) of H.P.Acthar 80 units/ml,38624.0
2,Xuriden,Non-Existant,45531.0,Costco,30 packets of Xuriden 2g,1517.7
3,Chenodal,Non-Existant,42281.0,Costco,90 tablets of Chenodal 250mg,469.789
4,Tretten,Non-Existant,37859.0,Costco,1 kit of Tretten 2500iu,37859.0


In [32]:
grx_df.to_pickle('../Pickles/grx_df.pkl')

In [33]:
grx_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3442 entries, 0 to 3441
Data columns (total 6 columns):
drug_name               3442 non-null object
generic_name            3442 non-null object
grx_lowest_price        3442 non-null float64
grx_pharmacy            3442 non-null object
grx_purchase            3442 non-null object
grx_lowest_price_adj    3442 non-null float64
dtypes: float64(2), object(4)
memory usage: 161.4+ KB
