In [1]:
import numpy as np
import random
from processing_data import *
from bokeh.plotting import figure
from bokeh.io import output_file, save

def render_boxplot(df, id, variable):
    # select the data from the dataframe
    df = df.loc[df['ID'] == id, variable]
    
    # Check if the variable is 'Fiber'
    if variable == 'Fiber':
        # Define baseline and control periods based on ID
        if id == 1:
            baseline_days = 8
            control_days = 10
        else:
            baseline_days = 14
            control_days = 14
        
        # Select baseline data
        baseline_start_date = min(df.index)
        baseline_end_date = baseline_start_date + pd.DateOffset(days=baseline_days)
        baseline_data = df[(df.index >= baseline_start_date) & (df.index < baseline_end_date)]

        # Select control data
        control_start_date = max(df.index) - pd.DateOffset(days=control_days)
        control_end_date = max(df.index)
        control_data = df[(df.index >= control_start_date) & (df.index <= control_end_date)]
        
    else:
        # Define baseline and control periods based on ID
        if id == 1:
            baseline_days = 8
            control_days = 8
        else:
            baseline_days = 14
            control_days = 14
        
        # Select baseline data
        baseline_start_date = min(df.index)
        baseline_end_date = baseline_start_date + pd.DateOffset(days=baseline_days)
        baseline_data = df[(df.index >= baseline_start_date) & (df.index < baseline_end_date)]

        # Select control data
        control_start_date = max(df.index) - pd.DateOffset(days=control_days)
        control_end_date = max(df.index)
        control_data = df[(df.index >= control_start_date) & (df.index <= control_end_date)]
    
    # Plot boxplots for baseline and control data
    p = figure(x_axis_label='Period', y_axis_label=variable)
    p.vbar(x=[1], top=baseline_data.mean(), width=0.5, color="blue", legend_label="Baseline")
    p.vbar(x=[2], top=control_data.mean(), width=0.5, color="red", legend_label="Control")
    p.legend.location = "top_left"

    # Uncomment the following lines if you want to save the plot to an HTML file
    # name = str(id) + '_' + variable + '.html'
    # output_file(name)
    # save(p)

    return p

# To test if this works
def main():
    df = data_processing()
    print(df) # to find the name of the column to plot enter by variable
    render_boxplot(df, id=4, variable='Fiber') # id = 1,2,3,4 or 5

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'bokeh'

In [9]:
import fiber_parser
import pandas as pd
import yaml

import seaborn as sns
import matplotlib.pyplot as plt

with open('config/config.yaml', "r") as stream:
    config = yaml.safe_load(stream)

# read data
df_fiber1 = pd.read_csv(config["dailysummary1"], header=0)
df_fiber2 = pd.read_csv(config["dailysummary2"], header=0)
df_fiber3 = pd.read_csv(config["dailysummary3"], header=0)
df_fiber4 = pd.read_csv(config["dailysummary4"], header=0)
df_fiber5 = pd.read_csv(config["dailysummary5"], header=0)

df_fiber = fiber_parser.fiber_processing([df_fiber1, df_fiber2, df_fiber3, df_fiber4, df_fiber5])

df_fiber

Unnamed: 0,ID,Date,Energy (kcal),Fiber
0,1,2024-01-08,499.97,10.93
1,1,2024-01-09,637.05,7.46
2,1,2024-01-10,537.83,8.09
3,1,2024-01-11,324.32,9.66
4,1,2024-01-12,522.97,5.19
...,...,...,...,...
52,5,2024-02-28,3279.72,38.42
53,5,2024-02-29,3091.27,39.74
54,5,2024-03-01,3561.57,35.44
55,5,2024-03-02,3451.76,37.72


In [10]:

# If your DataFrame is not already sorted by date, you may want to sort it first
# df_fiber['Date'] = pd.to_datetime(df_fiber['Date'])
df_fiber.set_index('Date', inplace=True)

# Calculate the mean of every 7 days for the 'Fiber' and 'Energy' columns for each ID
df_weekly_means = df_fiber.groupby('ID')['Fiber'].resample('7D').mean().reset_index()

df_weekly_means['Date'] = pd.to_datetime(df_weekly_means['Date'])
df_weekly_means = df_weekly_means.sort_values(by='Date').reset_index(drop=True)

# Define a function to calculate the week number within each group
def calculate_week_number(group):
    group['Week'] = range(1, len(group) + 1)
    return group

# Apply the function to calculate week numbers within each ID group
df_weekly_means = df_weekly_means.groupby('ID').apply(calculate_week_number).reset_index(drop=True)

print(df_weekly_means)


    ID       Date      Fiber  Week
0    1 2024-01-08   8.307143     1
1    1 2024-01-15  11.587143     2
2    1 2024-01-22  16.154286     3
3    1 2024-01-29  11.621429     4
4    1 2024-02-05  19.991429     5
5    1 2024-02-12  17.705000     6
6    1 2024-02-19  19.174000     7
7    1 2024-02-26   6.250000     8
8    2 2024-01-08  25.490000     1
9    2 2024-01-15  28.162857     2
10   2 2024-01-22  36.605714     3
11   2 2024-01-29  36.647143     4
12   2 2024-02-05  37.514286     5
13   2 2024-02-12  37.834286     6
14   2 2024-02-19  39.394286     7
15   2 2024-02-26  40.558571     8
16   3 2024-01-08  44.558571     1
17   3 2024-01-15  41.222857     2
18   3 2024-01-22  54.132857     3
19   3 2024-01-29  43.710000     4
20   3 2024-02-05  45.640000     5
21   3 2024-02-12  41.852857     6
22   3 2024-02-19  55.255714     7
23   3 2024-02-26  42.912857     8
24   4 2024-01-08  21.905714     1
25   4 2024-01-15  18.477143     2
26   4 2024-01-22  32.254286     3
27   4 2024-01-29  3

  df_weekly_means = df_weekly_means.groupby('ID').apply(calculate_week_number).reset_index(drop=True)


In [12]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px

# Assuming df_weekly_means is your DataFrame with weekly means
df_weekly_means['Date'] = pd.to_datetime(df_weekly_means['Date'])

# Initialize the Dash app
app = dash.Dash(__name__)

# Layout of the dashboard
app.layout = html.Div([
    html.H1("Weekly Fiber Analysis Dashboard"),
    
    # Dropdown for selecting ID
    dcc.Dropdown(
        id='id-dropdown',
        options=[{'label': str(id_value), 'value': id_value} for id_value in df_weekly_means['ID'].unique()],
        value=df_weekly_means['ID'].unique()[0],  # Initial value
        style={'width': '50%'}
    ),
    
    # Plot for displaying Fiber data
    dcc.Graph(id='fiber-plot'),
])

# Callback to update the Fiber plot based on selected ID
@app.callback(
    Output('fiber-plot', 'figure'),
    [Input('id-dropdown', 'value')]
)
def update_fiber_plot(selected_id):
    df_selected = df_weekly_means[df_weekly_means['ID'] == selected_id]
    fig = px.bar(df_selected, x='Week', y='Fiber (g)', color='Week', labels={'Fiber (g)': 'Mean Fiber (g)'})
    fig.update_layout(title=f'Fiber Analysis for ID {selected_id}', xaxis_title='Week', yaxis_title='Mean Fiber (g)')
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[12], line 36, in update_fiber_plot(selected_id=1)
     30 @app.callback(
     31     Output('fiber-plot', 'figure'),
     32     [Input('id-dropdown', 'value')]
     33 )
     34 def update_fiber_plot(selected_id):
     35     df_selected = df_weekly_means[df_weekly_means['ID'] == selected_id]
---> 36     fig = px.bar(df_selected, x='Week', y='Fiber (g)', color='Week', labels={'Fiber (g)': 'Mean Fiber (g)'})
        df_selected =    ID       Date      Fiber  Week
0   1 2024-01-08   8.307143     1
1   1 2024-01-15  11.587143     2
2   1 2024-01-22  16.154286     3
3   1 2024-01-29  11.621429     4
4   1 2024-02-05  19.991429     5
5   1 2024-02-12  17.705000     6
6   1 2024-02-19  19.174000     7
7   1 2024-02-26   6.250000     8
        px = <module 'plotly.express' from '/homes/zhe/Fiber/.venv/lib/python3.11/site-packages/plot