In [5]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the CSV file into a DataFrame
df = pd.read_excel('ProviderData_Final.xlsx','Program Opportunity')

# Create a histogram for the variable named 'Cost'
fig1 = px.histogram(df, x='Cost', nbins=20, title='Histogram of Cost')

# Create a boxplot of Cost by TrainingType
fig2 = px.box(df, x='Training Type*', y='Cost', title='Boxplot of Cost by TrainingType')

# Create a scatter plot of Cost vs TotalCredits colored by Industries using Plotly
fig3 = px.scatter(df, x='Total Credits/\nCurriculum Hours', y='Cost', color='Industries* ',
                 title='Scatter plot of Cost vs TotalCredits colored by Industries',
                 labels={'Total Credits/\nCurriculum Hours': 'Total Credits/\nCurriculum Hours', 'Cost': 'Cost'})

# Create a histogram for Cost vs Frequency for each industry with a dropdown filter
fig4 = px.histogram(df, x='Cost', color='Industries* ', barmode='overlay',
                   title='Histogram of Cost vs Frequency for Each Industry')

# Count the number of providers in each industry
industry_count = df['Industries* '].value_counts().reset_index()
industry_count.columns = ['Industries* ', 'Frequency']

# Create a bar chart using Plotly
fig5 = px.bar(industry_count, x='Industries* ', y='Frequency', text='Frequency',
             title='Number of Providers in Each Industry')

# Update the layout to rotate x-axis labels
fig5.update_layout(xaxis_tickangle=-45)

# Count the number of providers in each TrainingType
training_type_count = df['Training Type*'].value_counts().reset_index()
training_type_count.columns = ['Training Type*', 'Frequency']

# Create a bar chart using Plotly
fig6 = px.bar(training_type_count, x='Training Type*', y='Frequency', text='Frequency',
             title='Number of Providers in Each TrainingType')

# Update the layout to rotate x-axis labels
fig6.update_layout(xaxis_tickangle=-45)

# Combine both plots into a single HTML file
with open('visualizations.html', 'w') as f:
    f.write(fig1.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig2.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig3.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig4.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig5.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig6.to_html(full_html=False, include_plotlyjs='cdn'))

print("The visualizations have been saved to visualizations.html")


Data Validation extension is not supported and will be removed



The visualizations have been saved to visualizations.html


In [11]:
import pandas as pd
import re

# List of cities and towns with their respective counties
places_counties = {
    "Wilmington": "New Castle",
    "Newark": "New Castle",
    "Middletown": "New Castle",
    "New Castle": "New Castle",
    "Delaware City": "New Castle",
    "Arden": "New Castle",
    "Ardencroft": "New Castle",
    "Ardentown": "New Castle",
    "Bellefonte": "New Castle",
    "Clayton": "New Castle",
    "Elsmere": "New Castle",
    "Newport": "New Castle",
    "Odessa": "New Castle",
    "Smyrna": "New Castle",
    "Townsend": "New Castle",
    "Dover": "Kent",
    "Harrington": "Kent",
    "Bowers": "Kent",
    "Camden": "Kent",
    "Cheswold": "Kent",
    "Clayton": "Kent",
    "Farmington": "Kent",
    "Felton": "Kent",
    "Frederica": "Kent",
    "Hartly": "Kent",
    "Houston": "Kent",
    "Kenton": "Kent",
    "Leipsic": "Kent",
    "Little Creek": "Kent",
    "Magnolia": "Kent",
    "Smyrna": "Kent",
    "Viola": "Kent",
    "Woodside": "Kent",
    "Wyoming": "Kent",
    "Milford": "Sussex",
    "Seaford": "Sussex",
    "Lewes": "Sussex",
    "Rehoboth Beach": "Sussex",
    "Bethany Beach": "Sussex",
    "Bethel": "Sussex",
    "Blades": "Sussex",
    "Bridgeville": "Sussex",
    "Dagsboro": "Sussex",
    "Delmar": "Sussex",
    "Dewey Beach": "Sussex",
    "Ellendale": "Sussex",
    "Fenwick Island": "Sussex",
    "Frankford": "Sussex",
    "Georgetown": "Sussex",
    "Greenwood": "Sussex",
    "Henlopen Acres": "Sussex",
    "Laurel": "Sussex",
    "Millsboro": "Sussex",
    "Millville": "Sussex",
    "Milton": "Sussex",
    "Ocean View": "Sussex",
    "Selbyville": "Sussex",
    "Slaughter Beach": "Sussex",
    "South Bethany": "Sussex",
    "Schaumburg":"Schaumburg"
}

# List of ZIP codes to check
zipcodes = [
    19701, 19702, 19703, 19706, 19707, 19708, 19709, 19710, 19711, 19712, 
    19713, 19714, 19715, 19716, 19717, 19718, 19720, 19721, 19725, 19726, 
    19730, 19731, 19732, 19733, 19734, 19735, 19736, 19801, 19802, 19803, 
    19804, 19805, 19806, 19807, 19808, 19809, 19810, 19850, 19880, 19884, 
    19885, 19886, 19890, 19891, 19892, 19893, 19894, 19895, 19896, 19897, 
    19898, 19899, 19901, 19902, 19903, 19904, 19905, 19906, 19930, 19931, 
    19933, 19934, 19936, 19938, 19939, 19940, 19941, 19943, 19944, 19945, 
    19946, 19947, 19950, 19951, 19952, 19953, 19954, 19955, 19956, 19958, 
    19960, 19961, 19962, 19963, 19964, 19966, 19967, 19968, 19969, 19970, 
    19971, 19973, 19975, 19977, 19979, 19980,60173
]

# Read the CSV file
df = pd.read_excel('ProviderData_Final.xlsx', sheet_name='Program Opportunity')

# Function to extract everything before the first comma in the address
def extract_before_first_comma(address):
    if type(address) != str:
        return address
    return re.split(r',', address, 1)[0]

# Function to find city or town in address
def find_place(address):
    if isinstance(address, str):
        for place in places_counties.keys():
            if place in address:
                return place
    return None

# Function to find county based on city or town
def find_county(place):
    return places_counties.get(place, None)

# Function to find zipcode in address
def find_zipcode(address):
    if isinstance(address, str):
        for zipcode in zipcodes:
            if str(zipcode) in address:
                return zipcode
    return None

# Function to find state based on address
def find_state(address):
    if isinstance(address, str):
        if 'Delaware' in address or 'DE' in address:
            return 'Delaware'
        elif 'IL' in address:
            return 'Illinois'
    return None

# Function to separate 'Non-Credit' or 'Credit' from the 'Provider' column
def separate_credit_non_credit(provider):
    if type(provider) != str:
        return 'Unknown'
    if 'Non-Credit' in provider:
        return 'Non-Credit'
    elif 'Credit' in provider:
        return 'Credit'
    else:
        return 'Unknown'

# Function to remove 'Non-Credit' and 'Credit' from the 'Provider' column
def remove_credit_non_credit(provider):
    if type(provider) != str:
        return provider
    return provider.replace('- Non-Credit', '').replace('- Credit', '').strip()

# Apply the functions to create new columns
df['Street_Name'] = df['Address*'].apply(extract_before_first_comma)
df['City/Town'] = df['Address*'].apply(find_place)
df['County'] = df['City/Town'].apply(find_county)
df['Zipcode'] = df['Address*'].apply(find_zipcode)
df['State'] = df['Address*'].apply(find_state)
df['Credit Type'] = df['Provider*'].apply(separate_credit_non_credit)
df['Provider*'] = df['Provider*'].apply(remove_credit_non_credit)

# Save the updated DataFrame to a new CSV file
df.to_csv('Updated_file.csv', index=False)

print("The updated CSV file has been saved as 'updated_file.csv'.")

The updated CSV file has been saved as 'updated_file.csv'.


  for idx, row in parser.parse():
