In [25]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the CSV file into a DataFrame
df = pd.read_csv('ProviderData.csv', encoding='latin1')

# Create a histogram for the variable named 'Cost'
fig1 = px.histogram(df, x='Cost', nbins=20, title='Histogram of Cost')

# Create a boxplot of Cost by TrainingType
fig2 = px.box(df, x='TrainingType', y='Cost', title='Boxplot of Cost by TrainingType')

# Create a scatter plot of Cost vs TotalCredits colored by Industries using Plotly
fig3 = px.scatter(df, x='TotalCredits', y='Cost', color='Industries',
                 title='Scatter plot of Cost vs TotalCredits colored by Industries',
                 labels={'TotalCredits': 'TotalCredits', 'Cost': 'Cost'})

# Create a histogram for Cost vs Frequency for each industry with a dropdown filter
fig4 = px.histogram(df, x='Cost', color='Industries', barmode='overlay',
                   title='Histogram of Cost vs Frequency for Each Industry')

# Count the number of providers in each industry
industry_count = df['Industries'].value_counts().reset_index()
industry_count.columns = ['Industry', 'Frequency']

# Create a bar chart using Plotly
fig5 = px.bar(industry_count, x='Industry', y='Frequency', text='Frequency',
             title='Number of Providers in Each Industry')

# Update the layout to rotate x-axis labels
fig5.update_layout(xaxis_tickangle=-45)

# Count the number of providers in each TrainingType
training_type_count = df['TrainingType'].value_counts().reset_index()
training_type_count.columns = ['TrainingType', 'Frequency']

# Create a bar chart using Plotly
fig6 = px.bar(training_type_count, x='TrainingType', y='Frequency', text='Frequency',
             title='Number of Providers in Each TrainingType')

# Update the layout to rotate x-axis labels
fig6.update_layout(xaxis_tickangle=-45)

# Combine both plots into a single HTML file
with open('visualizations.html', 'w') as f:
    f.write(fig1.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig2.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig3.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig4.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig5.to_html(full_html=False, include_plotlyjs='cdn'))
    f.write(fig6.to_html(full_html=False, include_plotlyjs='cdn'))

print("The visualizations have been saved to visualizations.html")
 

The visualizations have been saved to visualizations.html


In [None]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('ProviderData.csv', encoding='latin1')

# Function to separate 'Non-Credit' or 'Credit' from the 'Provider' column
def separate_credit_non_credit(provider):
    if 'Non-Credit' in provider:
        return 'Non-Credit'
    elif 'Credit' in provider:
        return 'Credit'
    else:
        return 'Unknown'

# Apply the function to the 'Provider' column and create a new column 'Credit_Type'
df['Credit_Type'] = df['Provider'].apply(separate_credit_non_credit)

# Save the updated DataFrame to a new CSV file
df.to_csv('Updated_ProviderData1.csv', index=False)

print("The words 'Non-Credit' or 'Credit' have been separated from the 'Provider' column and saved to 'Updated_ProviderData.csv'.")

In [None]:
import pandas as pd
import re

# Load the CSV file into a DataFrame
df = pd.read_csv('Updated_ProviderData1.csv', encoding='latin1')

# Function to extract everything before the first comma in the address
def extract_before_first_comma(Address ):
    return re.split(r',', Address , 1)[0]

# Apply the function to the 'Address' column and create a new column 'Street_Name'
df['Street_Name'] = df['Address '].apply(extract_before_first_comma)

# Save the updated DataFrame to a new CSV file
df.to_csv('Updated_ProviderData_with_Street2.csv', index=False)

print("The street names have been extracted from the 'Address' column and saved to 'Updated_ProviderData_with_Street2.csv'.")