In [31]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 14 17:47:36 2024

@author: allen
"""
#%% Functions

import pandas as pd
import statsmodels.api as sm
import plotly.express as px


def extract_titles(input_str):
    return input_str.split(' - ')
    
def filter_data_by_substring(data, column_name, substring):
    return data[data[column_name].str.contains(substring, na=False)]

def load_csv(file_path):
    try:
        # Load the CSV file
        data = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except pd.errors.EmptyDataError:
        print("No data: The file is empty")
    except pd.errors.ParserError:
        print("Parse error: Check the file format")
    except Exception as e:
        print(f"An error occurred: {e}")
    return data


def plot_regression_with_stats(data, x_column, y_column, name_column):
    
    data = data[[x_column, y_column,name_column]].dropna()
    data[x_column] = pd.to_numeric(data[x_column], errors='coerce')
    data[y_column] = pd.to_numeric(data[y_column], errors='coerce')
    data = data.dropna()

    # Fit the regression model
    X = data[[x_column]]
    y = data[y_column]
    
    model = sm.OLS(y, X).fit()

    # Get R-squared and p-value
    r_squared = model.rsquared
    p_value = model.pvalues[0]  # p-value for the x_column coefficient

    # Plot the data
    fig = px.scatter(data, x=x_column, y=y_column, hover_data=name_column)
    
    # Add the regression line
    regression_line = pd.DataFrame({x_column: data[x_column], y_column: model.predict(X)})
    fig.add_traces(
        px.line(
            regression_line, x=x_column, y=y_column
        ).data
    )

    # Update layout to make hover text more readable
    fig.update_traces(textposition='top center')

    
    # Add titles and labels
    fig.update_layout(
        title=extract_titles(x_column)[1],
        xaxis_title=extract_titles(x_column)[2],
        yaxis_title=extract_titles(y_column)[2],
        annotations=[dict(
            x=0.05,
            y=0.95,
            xref='paper',
            yref='paper',
            text=f'R-squared = {r_squared:.2f}<br>p-value = {p_value:.2e}',
            showarrow=False,
            font=dict(size=12)
        )]
    )
    # Show the plot
    fig.show()  
    

# Implant Facility Analysis

In [32]:
# Specify the file path
file_path = 'C:/Users/allen/Documents/Abbott/Implant Facility.csv'

# Call the function to load the CSV
data = load_csv(file_path)

# Filter by state
data = filter_data_by_substring(data, 'State (Practice)', 'OH')

# Get data
column_headers = data.columns
total_patients = data['Total Patients']
total_revenue = data['Total Revenue']

# Create regression
for x,y in zip(total_patients.columns,total_revenue.columns):
    plot_regression_with_stats(data,x,y,['Organization Legal Name'])

TypeError: unhashable type: 'list'

# Analysis Community Neurologist

In [None]:
# Specify the file path
file_path = 'C:/Users/allen/Documents/Abbott/Community Neurologist.csv'

# Call the function to load the CSV
data = load_csv(file_path)

# Filter by state
data = filter_data_by_substring(data, 'State (Practice)', 'OH')

# Get data
column_headers = data.columns
total_patients = get_column_data(data, 'Total Patients')
total_revenue = get_column_data(data, 'Revenue')

# Create regression
for x,y in zip(total_patients.columns,total_revenue.columns):
    plot_regression_with_stats(data,x,y,'Affiliated Practice')