In [16]:
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.io.sas.sas_constants import dataset_length
from scipy.stats import gaussian_kde
import plotly.graph_objects as go
from IPython.display import clear_output

def assign_shift(hour):
    if 0 <= hour < 8:
        return 'Morning'
    elif 8 <= hour < 16:
        return 'Afternoon'
    else:
        return 'Night'

In [None]:
dataset_v1 = pd.read_csv('C://Users//amjads//Documents//Data_extract//Clinical_deterioration//obs_test.csv', delimiter = ",", encoding='ISO-8859-1')
dataset_v1.rename(columns={'»¿PAT_ENC_CSN_ID':'PAT_ENC_CSN_ID'},inplace=True)

# Convert the 'datetime_str' column to datetime with a specified format
dataset_v1['entry_dt'] = pd.to_datetime(dataset_v1['ENTRY_TIME'], format='%Y-%m-%d %H:%M:%S.%f')

In [15]:
dataset_v1['hour'] = dataset_v1['entry_dt'].dt.hour
dataset_v1['day'] = dataset_v1['entry_dt'].dt.day

dataset_v1['MEAS_VALUE'] = (dataset_v1['MEAS_VALUE'] - 32) * (5/9)

# Apply the function to the dt.hour column
dataset_v1['Shift'] = dataset_v1['hour'].apply(assign_shift)

shift_counts = dataset_v1['Shift'].value_counts()

df = pd.DataFrame()
df = dataset_v1.copy()

In [12]:
multiplier = 5.0

# Calculate Q1 (25th percentile) and Q3 (75th percentile) for MEAS_VALUE
Q1 = df['MEAS_VALUE'].quantile(0.25)
Q3 = df['MEAS_VALUE'].quantile(0.75)

# Calculate the IQR
IQR = Q3 - Q1

# Determine the upper and lower bounds
lower_bound = Q1 - multiplier * IQR
upper_bound = Q3 + multiplier * IQR

# Filter the dataframe to remove outliers
df_filtered = df[(df['MEAS_VALUE'] >= lower_bound) & (df['MEAS_VALUE'] <= upper_bound)]

# Print or use df_filtered for further analysis or plotting
# print(df_filtered)
dataframe = df_filtered.loc[:,:]

dataframe = dataframe.dropna(subset=['MEAS_VALUE'])
# Ensure 'hour' column has valid integers
dataframe['hour'] = dataframe['hour'].round().astype(int)

# Safely round temperature values using .loc[]
dataframe.loc[:, 'MEAS_VALUE'] = dataframe['MEAS_VALUE'].round(3)

In [33]:
# Group the data by hour and MEAS_VALUE to count occurrences
counts = dataframe.groupby(['hour', 'MEAS_VALUE']).size().reset_index(name='Count')

# Create a stacked bar plot using Plotly Graph Objects
fig = go.Figure()

# Normalize counts for color mapping
max_count = counts['Count'].max()
norm_counts = counts['Count'] / max_count  # Normalize to range [0, 1]

# Generate a color scale based on the counts
color_scale = px.colors.sequential.Viridis

# Add traces for each MEAS_VALUE
for meas_value in counts['MEAS_VALUE'].unique():
    subset = counts[counts['MEAS_VALUE'] == meas_value]

    # Create a color list for this subset based on normalized counts
    colors = [color_scale[int(count / max_count * (len(color_scale) - 1))] for count in subset['Count']]

    fig.add_trace(go.Bar(
        x=subset['hour'],
        y=subset['Count'],
        name=meas_value,
        text=subset['Count'],
        textposition='outside',
        marker=dict(color=colors)  # Use the colors derived from the counts
    ))

# Update layout for better visibility
fig.update_layout(
    title='Stacked Bar Plot for Temperature Readings by Hour',
    xaxis_title='Hour',
    yaxis_title='Count of Temperature Readings',
    barmode='stack',
    legend_title='MEAS_VALUE',
    xaxis=dict(tickmode='linear')
)

# Show the figure
fig.show()

In [27]:
# Group the data by hour and MEAS_VALUE to count occurrences
counts = df.groupby(['hour', 'MEAS_VALUE']).size().reset_index(name='Count')
print(counts)

# Create a stacked bar plot
fig = px.bar(counts,
             x='hour',
             y='Count',
             color='MEAS_VALUE',
             text='Count',
             title='Stacked Bar Plot for Temperature Readings by Hour',
             color_discrete_sequence=px.colors.sequential.Viridis,  # Adjust color scale
             barmode='stack')

# Update layout for better visibility
fig.update_layout(
    xaxis_title='Hour',
    yaxis_title='Count of Temperature Readings',
    legend_title='MEAS_VALUE',
    xaxis=dict(tickmode='linear')
)

# Position the text outside the bars
fig.for_each_trace(lambda t: t.update(textposition='outside'))

# Show the figure
fig.show()

      hour  MEAS_VALUE  Count
0        0  -15.987654      1
1        0    0.771605      1
2        0    0.956790      1
3        0    1.172840      1
4        0    1.234568      2
...    ...         ...    ...
1545    23    3.888889     16
1546    23    4.012346      9
1547    23    4.104938      1
1548    23    4.166667      2
1549    23    4.506173      1

[1550 rows x 3 columns]


In [None]:
fig = go.Figure()

# Add box plots for each shift
for shift in dataframe['Shift'].unique():
    shift_data = dataframe[dataframe['Shift'] == shift]
    fig.add_trace(go.Box(
        y=shift_data['MEAS_VALUE'],
        x=shift_data['hour'],
        name=shift,
        boxmean='sd',  # Show mean and standard deviation
        marker_color={
            'Morning': '#87CEEB',
            'Afternoon': '#FFA500',
            'Night': '#4B0082'
        }[shift]
    ))

# Overlay mean lines for each shift
for shift in dataframe['Shift'].unique():
    shift_mean = dataframe[dataframe['Shift'] == shift].groupby('hour')['MEAS_VALUE'].mean()
    fig.add_trace(go.Scatter(
        x=shift_mean.index,
        y=shift_mean.values,
        mode='lines+markers',
        name=f'Mean Temperature - {shift}',
        line=dict(width=2),
        marker=dict(color='black')
    ))

# Customize the layout
fig.update_layout(
    title='Boxplot of Temperature by Hour with Mean Line',
    xaxis_title='Hour of the Day',
    yaxis_title='Temperature (°C)',
    yaxis=dict(range=[32, 40]),  # Set fixed y-axis range
    legend_title='Shift',
    xaxis=dict(showgrid=True),  # Enable grid lines for x-axis
    #yaxis=dict(showgrid=True)   # Enable grid lines for y-axis
)

# Show the plot
fig.show()