In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<H2>The dataset contains hours worked by hospital employee classification and by hospital cost center groupings, as well as adjusted<p><p> patient days for all licensed, comparable hospitals in California. State mental hospitals and psychiatric health facilities are excluded<p><p>This project contains few data analysis</H2>

In [None]:
hosp = pd.read_csv('/kaggle/input/cusersmarildownloadshospcsv/hosp.csv',sep = ';')

In [None]:
hosp

<H4>Note: every row contain information for a team of a certain class not an individual<p>
    this is expected since productive hours for a year (begin date to end date) is very large for an individual<p>
no enough information, this data needs more details</H4>

<H3>Evaluating missing data<H3>

In [None]:
missing_data = hosp.isnull()
missing_data.head()

In [None]:
for column in missing_data.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("")

<H3>Here we have a few rows with missing data compared to the sample so we delete it</H3>


In [None]:
hosp = hosp.dropna()

In [None]:
for column in hosp.columns.values.tolist():
    print(column)
    print (missing_data[column].value_counts())
    print("")

<H3>Now we have no missing data<p><p>
we will convert the type of dates to date format</H3>

In [None]:
hosp.loc[:,'begin_date'] = pd.to_datetime(hosp['begin_date'], format='%m/%d/%Y')
hosp.loc[:,'end_date'] = pd.to_datetime(hosp['end_date'], format='%m/%d/%Y')

In [None]:
hosp.dtypes

<H3>Here we will investigate the difference in productive hours for facilities<p><p>
A little visualization and descriptive statistics</H3>

In [None]:
facility_avg_prdctv_hrs = hosp.groupby('facility_name')['productive_hours'].mean()
facility_avg_prdctv_hrs

In [None]:
facility_number_staff_sample = hosp['facility_name'].value_counts()
facility_number_staff_sample

<H3>Note: To have a clear visualization we plot a random sample (100 facility) instead of the total hospitals...every time you run the code will give a result<p>
of different sample but it is informative...try it</H3>

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import random


fig = go.Figure(data=go.Scatter(x=random.sample(list(hosp['facility_name']),100), y=facility_avg_prdctv_hrs, mode='markers', marker=dict(color='red')))

fig.update_layout(title='Average productive hours in hospitals in California', xaxis_title='Hospital', yaxis_title='Avg productive hours')

fig.update_xaxes(showticklabels=False)

fig.show()

In [None]:
facility_avg_prdctv_hrs_normalized = (facility_avg_prdctv_hrs-facility_avg_prdctv_hrs.mean())/facility_avg_prdctv_hrs.std()

In [None]:
fig = go.Figure(data=go.Scatter(x=random.sample(list(hosp['facility_name']),100), y=facility_avg_prdctv_hrs_normalized, mode='markers', marker=dict(color='red')))

fig.update_layout(title='Average productive hours in hospitals in California (normalized)', xaxis_title='Hospital', yaxis_title='Avg productive hours')

fig.update_xaxes(showticklabels=False)

fig.show()

In [None]:
print('Mean: ',facility_avg_prdctv_hrs.mean(),'Std: ',facility_avg_prdctv_hrs.std())

In [None]:
print('Mean: ',hosp['productive_hours'].mean(),'Std: ',hosp['productive_hours'].std())

<H3>Now we will investigate average of years of work for staff in facilities</H3>

In [None]:
hosp.loc[:,'days_work'] = hosp.loc[:,'end_date'] - hosp.loc[:,'begin_date']
hosp.loc[:,'days_work']

In [None]:
facility_avg_days_work = hosp.groupby('facility_name')['days_work'].mean()
facility_avg_days_work

In [None]:
fig = go.Figure(data=go.Scatter(x=random.sample(list(hosp['facility_name']),100), y=facility_avg_days_work.dt.days, mode='markers', marker=dict(color='red')))

fig.update_layout(title='Average days of work for staff in hospitals in California ', xaxis_title='Hospital', yaxis_title='Avg days work')

fig.update_xaxes(showticklabels=False)

fig.show()

The majority of the hospital's staff in the study work for a year..

<H3>Investigation for Avg work hours and productive hours in facilities</H3>

In [None]:
fig = go.Figure(data=go.Scatter(x=facility_avg_prdctv_hrs, y=facility_avg_days_work.dt.days, mode='markers', marker=dict(color='red')))

fig.update_layout(title='Average days of work vs productive hours in hospitals in California ', xaxis_title='productive hours', yaxis_title='Avg days work')

fig.show()

<H4>The differences  between facilities in productive hours is obvious</H4>

<H3></H3>

<H2>Recall</H2>The "productive hours per adjusted patient day" metric is a key performance indicator used in healthcare settings, particularly in hospitals, to measure the productivity and efficiency of hospital staff in relation to the number of adjusted patient days. It provides insights into how effectively hospital resources are utilized to provide patient care and manage hospital operations.

<H4>We will modify the previous figure adding this metric  to it...let us try the bubble chart</H4>

In [None]:
avg_prd_hr_adj_per_patientday = hosp.groupby('facility_name')["productive_hours_per_adjusted_patient_day"].mean()
avg_prd_hr_adj_per_patientday

In [None]:
fig = px.scatter(hosp, x=facility_avg_prdctv_hrs, y=facility_avg_days_work.dt.days, size=avg_prd_hr_adj_per_patientday,
                 hover_name=hosp["facility_name"].unique(), title='Productive Hours vs Work Days Adjuested Per Patient Day', size_max=60)
fig.update_layout(xaxis_title='productive hours', yaxis_title='Avg days work')

fig.show()

<H2>Check this ausome bubble chart and take your time in zooming and selecting...discover the varity of adjusted productive hours among facilities</H2>

<p><p><p>
 <H3>Teams Investigation</H3>   

In [None]:
teams = hosp.groupby('hours_type')[['productive_hours','productive_hours_per_adjusted_patient_day']].mean()

teams


In [None]:
fig = px.bar(teams, x=teams.index, y="productive_hours", title='Productive hours for teams') 
fig.update_layout(xaxis_title='teams', yaxis_title='productive hours')
fig.show()

In [None]:
fig = px.bar(teams, x=teams.index, y="productive_hours_per_adjusted_patient_day", title='Productive hours per adj patient day for teams') 
fig.update_layout(xaxis_title='teams', yaxis_title='Productive hours per adj patient day')
fig.show()

<H3>What did you learn from these figures??<p><p>
check for every team the productivity and complexity of work</H3>