# Exploring Available Pharmacy Data & Viz Options

## Notebook Configuration 

In [119]:
import pandas as pd
import plotly.express as px

# Set pandas to display more rows/columns for wider dataframes
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', 100)     # Adjust rows
pd.set_option('display.width', 1000)       # Increase width for better visibility

## Load Pharmacy Data

In [120]:
all_pharmacies = pd.read_csv('all_pharmacies.csv')
all_pharmacies

In [121]:
available_pharmacies = pd.read_csv('available_pharmacies.csv')
available_pharmacies

## Exploring All Pharmacy Table

In [122]:
all_pharmacies.info()

### WHAT type of Sterile Compounding Licenses have been issued by the California State Board of Pharmacy? 

In [123]:
# Aggregate the counts of each 'License Type'
license_type_counts = all_pharmacies['License Type'].value_counts().reset_index()
license_type_counts.columns = ['License Type', 'Count']

# Create the donut chart for License Type
fig = px.pie(
    license_type_counts,
    names='License Type',
    values='Count',
    title="Distribution of License Types",
    hole=0.4,  # This makes it a donut chart
)

# Update the legend labels for each trace to be more descriptive
fig.for_each_trace(lambda trace: trace.update(
    name="License Type A" if trace.name == "Type A" else
    "License Type B" if trace.name == "Type B" else
    "License Type C" if trace.name == "Type C" else
    trace.name  # Keep original name if it doesn't match any condition
))

# Customize the layout for title and legend
fig.update_layout(
    title={
        'text': "Distribution of License Types",
        'y': 0.95,  # Position the title just below the top of the chart
        'x': 0.5,  # Horizontally center the title
        'xanchor': 'center',
        'yanchor': 'top'  # Align title's top edge with y position
    },
    legend=dict(
        title="License Type",  # Add a legend title
        orientation="v",  # Horizontal legend layout
        yanchor="bottom",
        y=0.5,  # Place legend below the chart
        xanchor="right",
        x=.25
    )
)

# Show the plot
fig.show()


In [124]:
# Step 1: Aggregate the counts of each 'License Type'
license_type_counts = all_pharmacies['License Type'].value_counts().reset_index()
license_type_counts.columns = ['License Type', 'Count']

# Step 2: Create the horizontal bar chart
fig = px.bar(
    license_type_counts,
    x='Count',  # Bar length will be determined by the 'Count'
    y='License Type',  # Each bar corresponds to a 'License Type'
    orientation='h',  # Make the bars horizontal
    title="Distribution of License Types",
)

# Step 3: Customize the layout for title and axes
fig.update_layout(
    title={  # Title customization
        'text': "Distribution of License Types",
        'y': 0.95,  # Position the title just below the top of the chart
        'x': 0.5,  # Horizontally center the title
        'xanchor': 'center',
        'yanchor': 'top'  # Align title's top edge with y position
    },
    xaxis_title="Count",  # Label for the x-axis
    yaxis_title="License Type",  # Label for the y-axis
    yaxis=dict(tickmode='array'),  # Ensure all 'License Type' labels appear on the y-axis
)

# Step 4: Show the plot
fig.show()

### WHEN are the currently licenses expiring? 

In [125]:
# Step 1: Convert 'Expiration Date' to datetime (if not already done)
all_pharmacies['Expiration Date'] = pd.to_datetime(all_pharmacies['Expiration Date'], errors='coerce')

# Step 2: Group the data by month and year, and convert to string
expiration_monthly_counts = all_pharmacies.groupby(all_pharmacies['Expiration Date'].dt.to_period('M')).size().reset_index(name='Count')

# Convert Period to string for Plotly compatibility
expiration_monthly_counts['Expiration Date'] = expiration_monthly_counts['Expiration Date'].astype(str)

# Step 3: Plot the time series as a line chart with markers
fig = px.line(
    expiration_monthly_counts,
    x='Expiration Date',
    y='Count',
    title="Pharmacies Expiring Over Time",
    labels={'Expiration Date': 'Expiration Date', 'Count': 'Number of Pharmacies'},
    markers=True
)

# Add text labels for each marker to display the count
fig.update_traces(
    text=expiration_monthly_counts['Count'],   # Add the count as text
    textposition='top center',  # Position the text above each marker
)

# Show the plot
fig.show()

### WHO is being issued Sterile Compounding Licenses by the California State Board of Pharmacy? 

In [126]:
import plotly.express as px

# Step 1: Aggregate the counts of each 'Entity Type'
entity_type_counts = all_pharmacies['Entity Type'].value_counts().reset_index()
entity_type_counts.columns = ['Entity Type', 'Count']

# Step 2: Create the donut chart for Entity Type
fig = px.pie(
    entity_type_counts,  # Use entity_type_counts, not license_type_counts
    names='Entity Type',
    values='Count',
    title="Distribution of Entity Types",
    hole=0.4,  # This makes it a donut chart
)

# Step 3: Update the legend labels for each trace to be more descriptive
fig.for_each_trace(lambda trace: trace.update(
    name="Entity Type A" if trace.name == "Type A" else
    "Entity Type B" if trace.name == "Type B" else
    "Entity Type C" if trace.name == "Type C" else
    trace.name  # Keep original name if it doesn't match any condition
))

# Step 4: Customize the layout for title and legend
fig.update_layout(
    title={  # Title customization
        'text': "Distribution of Entity Types",
        'y': 0.95,  # Position the title just below the top of the chart
        'x': 0.5,  # Horizontally center the title
        'xanchor': 'center',
        'yanchor': 'top'  # Align title's top edge with y position
    },
    legend=dict(
        title="Entity Type",  # Add a legend title
        orientation="v",  # Vertical legend layout
        yanchor="bottom",
        y=0.5,  # Place legend below the chart
        xanchor="right",
        x=.25
    )
)

# Step 5: Show the plot
fig.show()


In [137]:
import plotly.express as px

# Step 1: Aggregate the counts of each 'Entity Type'
entity_type_counts = all_pharmacies['Entity Type'].value_counts().reset_index()
entity_type_counts.columns = ['Entity Type', 'Count']

# Step 2: Create the horizontal bar chart
fig = px.bar(
    entity_type_counts,
    x='Count',  # Use 'Count' for the x-axis (bar length)
    y='Entity Type',  # Use 'Entity Type' for the y-axis
    orientation='h',  # Make bars horizontal
    title="What type of facilities hold sterile compounding licenses?",
)

# Step 3: Customize the layout for title and axes
fig.update_layout(
    title={  # Title customization
        'text': "What type of facilities hold sterile compounding licenses?",
        'y': 0.95,  # Position the title just below the top of the chart
        'x': 0.5,  # Horizontally center the title
        'xanchor': 'center',
        'yanchor': 'top'  # Align title's top edge with y position
    },
    xaxis_title="Count",  # Label for the x-axis (Count of entities)
    yaxis_title="Entity Type",  # Label for the y-axis (Type of entity)
)

# Step 4: Show the plot
fig.show()


In [128]:
all_pharmacies.head(50)

### WHERE are the licensed Sterile Compounding Pharmacies located within the US? 

In [129]:
# Step 2: Create the map
fig = px.scatter_mapbox(
    all_pharmacies,
    lat="LAT",
    lon="LNG",
    hover_name="Pharmacy Name",  # Pharmacy name shown on hover
    zoom=3,  # Adjust zoom level for a national view
    center={"lat": 37.0902, "lon": -95.7129},  # Center on the US
    mapbox_style="open-street-map",  # Use a free map style
    title="Pharmacies in the US"
)

# Step 3: Update the figure layout for better aspect ratio
fig.update_layout(
    autosize=True,  # Automatically adjust to screen size
    margin={"r":0,"t":0,"l":0,"b":0},  # Remove excessive margins
    height=800  # Set a height to ensure the map is not squished
)

# Step 4: Show the plot
fig.show()


In [130]:
import plotly.express as px

# Step 1: Ensure that the 'Entity Type' column is included and non-null
# Step 2: Create the map and color the dots by 'Entity Type'
fig = px.scatter_mapbox(
    all_pharmacies,
    lat="LAT",
    lon="LNG",
    hover_name="Pharmacy Name",  # Pharmacy name shown on hover
    color="Entity Type",  # Color the dots by Entity Type
    zoom=3,  # Adjust zoom level for a national view
    center={"lat": 37.0902, "lon": -95.7129},  # Center on the US
    mapbox_style="open-street-map",  # Use a free map style
    title="Pharmacies in the US"
)

# Step 3: Update the figure layout for better aspect ratio
fig.update_layout(
    autosize=True,  # Automatically adjust to screen size
    margin={"r":0,"t":0,"l":0,"b":0},  # Remove excessive margins
    height=800  # Set a height to ensure the map is not squished
)

# Step 4: Show the plot
fig.show()


# Exploring the Available Pharmacies

An available pharmacy is any licensed Sterile Compounding Pharmacy that:

- Is not a government facility
- Is not a Hospital or Medical Center
- Is not an Infusion Center or Infusion Service
- Is not dedicated only to Veterinary Compounding


In [131]:
available_pharmacies.info()

### When will the available pharmacies licenses expire? 

In [132]:
# Step 1: Convert 'Expiration Date' to datetime (if not already done)
available_pharmacies['Expiration Date'] = pd.to_datetime(available_pharmacies['Expiration Date'], errors='coerce')

# Step 2: Group the data by month and year, and convert to string
expiration_monthly_counts = available_pharmacies.groupby(available_pharmacies['Expiration Date'].dt.to_period('M')).size().reset_index(name='Count')

# Convert Period to string for Plotly compatibility
expiration_monthly_counts['Expiration Date'] = expiration_monthly_counts['Expiration Date'].astype(str)

# Step 3: Plot the time series as a line chart with markers
fig = px.line(
    expiration_monthly_counts,
    x='Expiration Date',
    y='Count',
    title="Pharmacies Expiring Over Time",
    labels={'Expiration Date': 'Expiration Date', 'Count': 'Number of Pharmacies'},
    markers=True
)

# Add text labels for each marker to display the count
fig.update_traces(
    text=expiration_monthly_counts['Count'],   # Add the count as text
    textposition='top center',  # Position the text above each marker
)

# Show the plot
fig.show()

### What specialties do the available pharmacies advertise? 

In [133]:
import pandas as pd
import plotly.express as px

# Step 1: Select columns that start with 'is'
is_columns = [col for col in available_pharmacies.columns if col.startswith('is')]

# Step 2: Count the number of True values in each 'is' column
is_counts = available_pharmacies[is_columns].sum()

# Step 3: Create a DataFrame from the counts for easier plotting
is_counts_df = is_counts.reset_index()
is_counts_df.columns = ['Entity Type', 'Count']

# Step 4: Create the bar chart
fig = px.bar(
    is_counts_df,
    x='Entity Type',
    y='Count',
    title='Count of Pharmacies by Specialty',
    labels={'Entity Type': 'Entity Type', 'Count': 'Count of Pharmacies'},
    color='Count',  # Color bars by count
    color_continuous_scale='Viridis'  # Choose a color scale
)

# Step 5: Customize the layout for better readability
fig.update_layout(
    xaxis_title="Entity Type",
    yaxis_title="Count of Pharmacies",
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    autosize=True,
    margin={"r":0,"t":50,"l":50,"b":50},  # Add margins for spacing
    height=600  # Set a height for the plot
)

# Step 6: Show the plot
fig.show()


### What specific conditions do the available pharmacies mention on their website? 

In [134]:
import pandas as pd
import plotly.express as px

# Step 1: Select columns that start with 'services'
services_columns = [col for col in available_pharmacies.columns if col.startswith('services')]

# Step 2: Count the number of True values in each 'services' column
services_counts = available_pharmacies[services_columns].sum()

# Step 3: Create a DataFrame from the counts for easier plotting
services_counts_df = services_counts.reset_index()
services_counts_df.columns = ['Service Type', 'Count']

# Step 4: Create the bar chart
fig = px.bar(
    services_counts_df,
    x='Service Type',
    y='Count',
    title='Count of Pharmacies by Conditions Treated',
    labels={'Service Type': 'Service Type', 'Count': 'Count of Pharmacies'},
    color='Count',  # Color bars by count
    color_continuous_scale='Viridis'  # Choose a color scale
)

# Step 5: Customize the layout for better readability
fig.update_layout(
    xaxis_title="Service Type",
    yaxis_title="Count of Pharmacies",
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    autosize=True,
    margin={"r":0,"t":50,"l":50,"b":50},  # Add margins for spacing
    height=600  # Set a height for the plot
)

# Step 6: Show the plot
fig.show()


### How many specialties does each pharmacy have? 

In [135]:
import pandas as pd
import plotly.express as px

# Step 1: Select columns that start with 'is' (specialty columns)
specialty_columns = [col for col in available_pharmacies.columns if col.startswith('is')]

# Step 2: Melt the DataFrame to long format
melted_df = available_pharmacies[['Pharmacy Name'] + specialty_columns].melt(
    id_vars=['Pharmacy Name'], value_vars=specialty_columns, 
    var_name='Specialty', value_name='Has Specialty'
)

# Step 3: Filter only the rows where 'Has Specialty' is True
melted_df = melted_df[melted_df['Has Specialty'] == True]

# Step 4: Count the number of specialties for each pharmacy
specialty_counts = melted_df.groupby('Pharmacy Name').size().reset_index(name='Specialty Count')

# Step 5: Create the bar chart showing the number of specialties for each pharmacy
fig = px.bar(
    specialty_counts,
    x='Pharmacy Name',  # Pharmacy Name on the x-axis
    y='Specialty Count',  # Count of specialties on the y-axis
    title='Number of Specialties per Pharmacy',
    labels={'Specialty Count': 'Number of Specialties', 'Pharmacy Name': 'Pharmacy Name'},
    color='Specialty Count',  # Color bars by the number of specialties
    color_continuous_scale='Viridis',  # Choose a color scale
    text='Specialty Count'  # Show the count on the bars
)

# Step 6: Customize the layout for better readability
fig.update_layout(
    xaxis_title="Pharmacy Name",
    yaxis_title="Number of Specialties",
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    autosize=True,
    margin={"r":0,"t":50,"l":50,"b":150},  # Add margins for spacing
    height=600  # Set a height for the plot
)

# Step 7: Show the plot
fig.show()


### Where are available pharmacies located? 

In [136]:
import plotly.express as px

# List of columns to check for True values
columns_to_check = ['isGovernment', 'isSatellite', 'isInfusionCenter', 'isHospital', 'isVeterinaryOnly']

# Create the condition to filter out rows where any of these columns have a True value
condition = ~all_pharmacies[columns_to_check].any(axis=1)

# Apply the condition to the DataFrame
filtered_pharmacies = all_pharmacies[condition]

# Step 2: Create the map for filtered pharmacies
fig = px.scatter_mapbox(
    filtered_pharmacies,  # Use the filtered DataFrame
    lat="LAT",
    lon="LNG",
    hover_name="Pharmacy Name",
    zoom=3,  # Adjust zoom level for a national view
    center={"lat": 37.0902, "lon": -95.7129},  # Center on the US
    mapbox_style="open-street-map",  # Use a free map style
    title="Filtered Pharmacies in the US"
)

# Step 3: Update layout for better aspect ratio and positioning
fig.update_layout(
    autosize=True,
    margin={"r":0, "t":50, "l":50, "b":50},  # Adjust margins
    height=800  # Set a height to ensure the map is not squished
)

# Step 4: Show the plot
fig.show()


### What percentage of licensed pharmacies are 503B Outsource Pharmacies and under FDA oversight? 

### When will licenses expire per specialty