In [1]:
import pandas as pd
import numpy as np
import censusgeocode as cg
import time
from datetime import datetime
import os

In [4]:
keep_col = ['NPI','Entity Type Code','Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number',
            'NPI Deactivation Reason Code']
# 'Provider Last Name (Legal Name)', 'Provider First Name', 'Provider Middle Name',
# 'Provider Name Prefix Text', 'Provider Name Suffix Text',
# 'NPI Deactivation Date','NPI Reactivation Date',

### Arizona

In [6]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'AZ'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_az = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_az['Provider Business Practice Location Address Postal Code'] = ph_tx_az['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_az = ph_tx_az[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_az

2023-03-27 09:19:53.560302
Grabbed iter 1 total sub n so far 7
Grabbed iter 2 total sub n so far 34
Grabbed iter 3 total sub n so far 51
Grabbed iter 4 total sub n so far 76
Grabbed iter 5 total sub n so far 100
Grabbed iter 6 total sub n so far 140
Grabbed iter 7 total sub n so far 176
Grabbed iter 8 total sub n so far 200
2023-03-27 09:21:53.495129


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,PEDORTHIC SERVICES PLLC,3264 E SPEEDWAY BLVD,TUCSON,AZ,85716,5203273166
1,"BEAS ENTERPRISES, INC",13980 W BELL RD STE 7,SURPRISE,AZ,85374,6235841618
2,TOWNSON'S PROSTHETICS,3855 N. ORACLE RD.,TUCSON,AZ,85705,5208881334
3,"GBL ENTERPRISES, LLC",5350 N 16TH ST,PHOENIX,AZ,85016,6022349568
4,"CUSTOM OCULAR PROSTHETICS, INC.",9465 E HARRISON PL,TUCSON,AZ,85710,5207227471
...,...,...,...,...,...,...
195,"NEVADA ORTHOTICS & PROSTHETICS, INC",1863 AIRFIED AVENUE,KINGMAN,AZ,86401,9287183987
196,PRO AMERICAN MEDICAL SUPPLIES,2001 W ROSE GARDEN LN,PHOENIX,AZ,85027,6024308647
197,"CRANIAL TECHNOLOGIES, INC.",3591 S MERCY RD STE 201,GILBERT,AZ,85297,8444475894
198,ARTISAN PROSTHETICS LLC,4501 N 32ND ST,PHOENIX,AZ,85018,6026677827


In [16]:
ph_tx_az[ph_tx_az['Provider Organization Name (Legal Business Name)'].str.contains("NORDSTROM")]

Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
49,NORDSTROM INC & SUBSIDIARIES,7055 E CAMELBACK RD,SCOTTSDALE,AZ,85251,6029464111
50,NORDSTROM INC & SUBSIDIARIES,3199 W CHANDLER BLVD,CHANDLER,AZ,85226,4808552500


In [22]:
l = ph_tx_az[ph_tx_az['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_az['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

59

### Nevada

In [17]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'NV'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_nv = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_nv['Provider Business Practice Location Address Postal Code'] = ph_tx_nv['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_nv = ph_tx_nv[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_nv

2023-03-27 09:32:42.088424
Grabbed iter 1 total sub n so far 4
Grabbed iter 2 total sub n so far 18
Grabbed iter 3 total sub n so far 28
Grabbed iter 4 total sub n so far 36
Grabbed iter 5 total sub n so far 46
Grabbed iter 6 total sub n so far 62
Grabbed iter 7 total sub n so far 78
Grabbed iter 8 total sub n so far 102
2023-03-27 09:34:44.631377


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,"EMPICARE, INC.",880 SEVEN HILLS DR,HENDERSON,NV,89052,7029902290
1,"M&M ORTHOTIC & PROSTHETIC CENTERS, LLC",3061 S MARYLAND PKWY,LAS VEGAS,NV,89109,7029921200
2,THERAPEUTIC MASSAGING INSOLES,3280 WYNN RD,LAS VEGAS,NV,89102,7029662414
3,"HANGER PROSTHETICS & ORTHOTICS WEST, INC.",6430 MEDICAL CENTER ST,LAS VEGAS,NV,89148,7022621353
4,"ORTHOPRO OF CARSON CITY, INC.",415 W SOPHIA ST,CARSON CITY,NV,89703,7758410660
...,...,...,...,...,...,...
97,JAMES J LYNCH MD LTD,780 VISTA BLVD STE 100,SPARKS,NV,89434,7753488800
98,JAMES J LYNCH M D LTD,1470 MEDICAL PKWY STE 220,CARSON CITY,NV,89703,7753488800
99,JAMES J LYNCH MD LTD,9990 DOUBLE R BLVD STE 200,RENO,NV,89521,7753488800
100,HURD INDUSTRIES LLC,5067 MADRE MESA DR APT 2015,LAS VEGAS,NV,89108,9032805025


In [18]:
ph_tx_nv[ph_tx_nv['Provider Organization Name (Legal Business Name)'].str.contains("NORDSTROM")]

Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
27,NORDSTROM INC & SUBSIDIARIES,3200 LAS VEGAS BLVD S STE 710,LAS VEGAS,NV,89109,7028622525


In [21]:
l = ph_tx_nv[ph_tx_nv['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_nv['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

21

### Texas

In [23]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'TX'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_tx = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_tx['Provider Business Practice Location Address Postal Code'] = ph_tx_tx['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_tx = ph_tx_tx[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_tx

2023-03-27 09:36:42.021007
Grabbed iter 1 total sub n so far 55
Grabbed iter 2 total sub n so far 188
Grabbed iter 3 total sub n so far 282
Grabbed iter 4 total sub n so far 360
Grabbed iter 5 total sub n so far 441
Grabbed iter 6 total sub n so far 570
Grabbed iter 7 total sub n so far 665
Grabbed iter 8 total sub n so far 746
2023-03-27 09:38:45.412652


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,ASELAGE ORTHOTIC SERVICES,1001 LOUISIANA AVE STE 304,CORPUS CHRISTI,TX,78404,3618542355
1,"NIC DME, INC",3120 CENTER POINT DR,EDINBURG,TX,78539,9569949424
2,SPORTS THERAPY AND REHABILITATION SERVICES,1555 MERRIMAC CIRCLE,FORT WORTH,TX,76107,8178704949
3,"PARIS O & P, INC.",2619 NE LOOP 286 STE A,PARIS,TX,75460,9037858922
4,"HEDGECOCK ARTIFICIAL LIMB CO., INC",1917 S BECKHAM AVE,TYLER,TX,75701,9035312228
...,...,...,...,...,...,...
741,"OPA 1, LTD",5420 WEST LOOP S STE 1200,BELLAIRE,TX,77401,7136608801
742,NEMT MALEXIS LLC,8300 BISSONNET ST STE 460E,HOUSTON,TX,77074,7133915035
743,MAJOR MEDICAL SUPPLY INC,1700 W 2ND ST,ODESSA,TX,79763,4323323621
744,INMOTION PROSTHETICS HOUSTON LLC,7324 SOUTHWEST FWY STE 2-885,HOUSTON,TX,77074,8325812324


In [27]:
l = ph_tx_tx[ph_tx_tx['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_tx['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

124

### Washington

In [25]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'WA'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_wa = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_wa['Provider Business Practice Location Address Postal Code'] = ph_tx_wa['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_wa = ph_tx_wa[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_wa

2023-03-27 09:46:51.924525
Grabbed iter 1 total sub n so far 15
Grabbed iter 2 total sub n so far 54
Grabbed iter 3 total sub n so far 92
Grabbed iter 4 total sub n so far 107
Grabbed iter 5 total sub n so far 126
Grabbed iter 6 total sub n so far 153
Grabbed iter 7 total sub n so far 191
Grabbed iter 8 total sub n so far 205
2023-03-27 09:48:54.115012


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,"CENTER FOR PROSTHETICS ORTHOTICS, INC.",411 12TH AVE,SEATTLE,WA,98122,2063284276
1,"CENTER FOR PROSTHETICS ORTHOTICS, INC.",411 12TH AVE,SEATTLE,WA,98122,2063284276
2,"MASTER'S ORTHOTICS AND PROSTHETICS, LLC",4409 NW ANDERSON HILL RD,SILVERDALE,WA,98383,3606986020
3,"CASCADE PROSTHETICS AND ORTHOTICS, INC.",17670 DUNBAR RD,MOUNT VERNON,WA,98273,3604284003
4,BOND ENTERPRISES INC.,4700 POINT FOSDICK DR NW,GIG HARBOR,WA,98335,2538589941
...,...,...,...,...,...,...
200,"INLAND NORTHWEST ORTHOTICS AND PROSTHETICS, INC",717 6TH ST,CLARKSTON,WA,99403,5097588874
201,DISCREET SUPPLIES LLC,5215 W CLEARWATER AVE STE 106,KENNEWICK,WA,99336,5094913266
202,JANALAIR LLC,7942 NOBLE VIEW LN NW,OLYMPIA,WA,98502,3607912207
203,JANALAIR LLC,7942 NOBLE VIEW LN NW,OLYMPIA,WA,98502,3607912207


In [28]:
l = ph_tx_wa[ph_tx_wa['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_wa['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

58

### Oregon

In [26]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'OR'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_or = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_or['Provider Business Practice Location Address Postal Code'] = ph_tx_or['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_or = ph_tx_or[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_or

2023-03-27 09:48:54.126232
Grabbed iter 1 total sub n so far 3
Grabbed iter 2 total sub n so far 22
Grabbed iter 3 total sub n so far 47
Grabbed iter 4 total sub n so far 62
Grabbed iter 5 total sub n so far 78
Grabbed iter 6 total sub n so far 90
Grabbed iter 7 total sub n so far 113
Grabbed iter 8 total sub n so far 124
2023-03-27 09:50:57.808975


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,FOOT FITNESS,258 SW 5TH ST STE 2,REDMOND,OR,97756,5419232552
1,PARADOX WALKFIT INC.,15532 SW PACIFIC HWY,TIGARD,OR,97224,5036246400
2,HANGER PROSTHETICS & ORTHOTICS WEST INC,341 SE 3RD AVE,HILLSBORO,OR,97123,5036408477
3,HANGER PROSTHETICS & ORTHOTICS INC,2275 NE DOCTORS DR,BEND,OR,97701,5413890633
4,"SYNERGY MEDICAL SYSTEMS, INC.",1245 RHODODENDRON DR,FLORENCE,OR,97439,5419974134
...,...,...,...,...,...,...
119,"SPECTRUM PROSTHETICS AND ORTHOTICS, LLC",300 UNION AVE STE C,GRANTS PASS,OR,97527,5419559678
120,SPECTRUM ORTHOTICS & PROSTHETICS INC,3541 E BARNETT RD,MEDFORD,OR,97504,5417342435
121,KRFS LLC,2231 N ELDORADO AVE,KLAMATH FALLS,OR,97601,5418845348
122,SPECTRUM PROSTHETICS LLC,1963 THOMPSON RD,COOS BAY,OR,97420,5412691773


In [29]:
l = ph_tx_or[ph_tx_or['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_or['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

43

### Illinois

In [24]:
taxon_codes = ['Healthcare Provider Taxonomy Code_' + str(i+1) for i in range(15)]
keep_col += taxon_codes
community_pharm = ['222Z00000X', '224P00000X','335E00000X']
npi_csv = 'npidata_pfile_20050523-20230212.csv' #Newer files will prob change the name

# This defines the rows I want
def sub_rows(data):
    ec = data['Entity Type Code'] == "2"
    st = data['Provider Business Practice Location Address State Name'] == 'IL'
    ta = data[taxon_codes].isin(community_pharm).any(axis=1)
    ac = data['NPI Deactivation Reason Code'].isna()
    all_together = ec & st & ta & ac 
    sub = data[all_together]
    return sub

def csv_chunks(file,chunk_size,keep_cols,row_sub):
    # First lets get the header and figure out the column indices
    header_fields = list(pd.read_csv(npi_csv, nrows=1))
    header_locs = [header_fields.index(i) for i in keep_cols]
    # Now reading in a chunk of data
    skip = 1
    it_n = 0
    sub_n = 0
    ret_chunk = chunk_size
    fin_li_dat = []
    while ret_chunk == chunk_size:
        file_chunk = pd.read_csv(file, usecols=header_locs, skiprows=skip, 
                     nrows=chunk_size, names=header_fields, dtype='str')
        sub_dat = row_sub(file_chunk)
        fin_li_dat.append( sub_dat.copy() )
        skip += chunk_size
        it_n += 1
        sub_n += sub_dat.shape[0]
        print(f'Grabbed iter {it_n} total sub n so far {sub_n}')
        ret_chunk = file_chunk.shape[0]
    fin_dat = pd.concat(fin_li_dat, axis=0)
    return fin_dat

# Takes about 3 minutes
print( datetime.now() )
pharm_tx = csv_chunks(npi_csv, chunk_size=1000000, keep_cols=keep_col, row_sub=sub_rows)
print( datetime.now() )

ph_tx_il = pharm_tx.drop(columns=taxon_codes).reset_index(drop=True)
ph_tx_il['Provider Business Practice Location Address Postal Code'] = ph_tx_il['Provider Business Practice Location Address Postal Code'].str[0:5]

ph_tx_il = ph_tx_il[['Provider Organization Name (Legal Business Name)',
            'Provider First Line Business Practice Location Address',
            'Provider Business Practice Location Address City Name',
            'Provider Business Practice Location Address State Name',
            'Provider Business Practice Location Address Postal Code',
            'Provider Business Practice Location Address Telephone Number']]
ph_tx_il

2023-03-27 09:40:17.150540
Grabbed iter 1 total sub n so far 29
Grabbed iter 2 total sub n so far 82
Grabbed iter 3 total sub n so far 114
Grabbed iter 4 total sub n so far 137
Grabbed iter 5 total sub n so far 190
Grabbed iter 6 total sub n so far 263
Grabbed iter 7 total sub n so far 329
Grabbed iter 8 total sub n so far 352
2023-03-27 09:42:22.533302


Unnamed: 0,Provider Organization Name (Legal Business Name),Provider First Line Business Practice Location Address,Provider Business Practice Location Address City Name,Provider Business Practice Location Address State Name,Provider Business Practice Location Address Postal Code,Provider Business Practice Location Address Telephone Number
0,BEAUTY IMAGE CENTER,1280 BAMBERG CT,HANOVER PARK,IL,60103,6304839225
1,AGONY OF DE-FEET LTD.,1324 S MILWAUKEE AVE,LIBERTYVILLE,IL,60048,8476807534
2,ANTOINETTE'S WIGS & DAISY BOUTIQUE MASTECTOMY ...,4714 147TH ST,MIDLOTHIAN,IL,60445,7083855736
3,"NATIONAL PEDORTHIC SERVICES, INC.",4804 S STATE ROUTE 159,GLEN CARBON,IL,62034,4144381211
4,WEIBEL ENTERPRISES,1012 S ELMHURST RD,MOUNT PROSPECT,IL,60056,8477180587
...,...,...,...,...,...,...
347,GLENN REAMS OCULARIST INC,111 S MAIN ST STE 2F,WATERLOO,IL,62298,6189397577
348,PSI HEALTH CARE INC,1515 46TH AVE STE 1,MOLINE,IL,61265,5633593556
349,"SYNAPSE HEALTH, INC.",1603 ORRINGTON AVE STE LL004,EVANSTON,IL,60201,8477374455
350,SALUBRIOUS INTERNATIONAL LLC,359 E 79TH ST,CHICAGO,IL,60619,7734690386


In [30]:
l = ph_tx_il[ph_tx_az['Provider Organization Name (Legal Business Name)'].str.contains("PROSTHETICS") & ph_tx_il['Provider Organization Name (Legal Business Name)'].str.contains("ORTHOTICS")]
len(l)

19