In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define the period of 4 months
start_date = datetime(2024, 1, 1)
end_date = start_date + timedelta(days=30*4)  # Assuming 30 days in each month

# Generate date range with week numbers and days
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
week_numbers = [date.isocalendar()[1] for date in date_range]
days = [date.strftime("%A") for date in date_range]

# Generate random tip values ranging from 0.5 to 20 dollars
tips = [round(random.uniform(0.5, 20), 2) for _ in range(len(date_range))]

# Create a DataFrame to store the data
data = pd.DataFrame({'Date': date_range, 'Week_Number': week_numbers, 'Day': days, 'Tip': tips})
# Add a new column for the month name
data['Month'] = data['Date'].dt.month_name()
data['Quarter'] = pd.to_datetime(data['Date']).dt.to_period('Q')


In [2]:
# Display the first few rows of the dataset
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         121 non-null    datetime64[ns]
 1   Week_Number  121 non-null    int64         
 2   Day          121 non-null    object        
 3   Tip          121 non-null    float64       
 4   Month        121 non-null    object        
 5   Quarter      121 non-null    period[Q-DEC] 
dtypes: datetime64[ns](1), float64(1), int64(1), object(2), period[Q-DEC](1)
memory usage: 5.8+ KB


In [3]:
data.tail(15)

Unnamed: 0,Date,Week_Number,Day,Tip,Month,Quarter
106,2024-04-16,16,Tuesday,3.98,April,2024Q2
107,2024-04-17,16,Wednesday,4.45,April,2024Q2
108,2024-04-18,16,Thursday,13.68,April,2024Q2
109,2024-04-19,16,Friday,4.16,April,2024Q2
110,2024-04-20,16,Saturday,13.72,April,2024Q2
111,2024-04-21,16,Sunday,10.22,April,2024Q2
112,2024-04-22,17,Monday,14.73,April,2024Q2
113,2024-04-23,17,Tuesday,1.37,April,2024Q2
114,2024-04-24,17,Wednesday,18.63,April,2024Q2
115,2024-04-25,17,Thursday,6.85,April,2024Q2


In [None]:
import plotly.express as px

grouped_data = data[data['Day'].isin(['Saturday','Sunday'])]
#.groupby(['Day', 'Week_Number', 'Month'])['Tip'].sum().reset_index()

# Concatenate labels for multi-line labels
#grouped_data['Label'] = grouped_data['Day'] + '<br>Week ' + grouped_data['Week_Number'].astype(str) + '<br>' + grouped_data['Date']

# Create Plotly plot
fig = px.box(grouped_data, x='Month', y='Tip', color='Week_Number', 
             labels={'Tip': 'Total Tips ($)', 'Label': 'Day, Week, Month'},
             title='Total Tips by Day, Week, and Month',
             hover_name='Day',
             hover_data={'Week_Number': True, 'Date': True},
             boxmode='group')

fig.update_xaxes(tickangle=45, tickmode='linear', dtick=1)

fig.show()

In [5]:
fig = px.box(data, x='Month', y='Tip', color='Week_Number', 
             title='Tips Distribution by Month and Week',
             labels={'Tip': 'Tips ($)', 'Month': 'Month'},
             boxmode='group',
             category_orders={'Month': ['January', 'February', 'March', 'April']})

In [None]:
fig.show()

In [None]:
# Create Plotly box plot
fig = px.box(data, x='Month', y='Tip', color='Week_Number', 
             title='Tips Distribution by Month and Quarter',
             labels={'Tip': 'Tips ($)', 'Month': 'Month'},
             category_orders={'Month': ['January', 'February', 'March', 'April']})



fig.show()

In [8]:

# Convert 'Quarter' column to string
grouped_data['Quarter'] = grouped_data['Quarter'].astype(str)
# Define unique quarters, months, and weeks for the axes
quarters = grouped_data['Quarter'].unique()
months = grouped_data['Month'].unique()
weeks = grouped_data['Week_Number'].unique()

quarters, months, weeks



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



(array(['2024Q1', '2024Q2'], dtype=object),
 array(['January', 'February', 'March', 'April'], dtype=object),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17],
       dtype=int64))

In [13]:
import plotly.graph_objects as go

fig = go.Figure()

for quarter in quarters:
    for month in months:

            filtered_data = grouped_data[(grouped_data['Month'] == month) & 
                                         (grouped_data['Quarter'] == quarter)]
            if not filtered_data.empty:
                fig.add_trace(go.Bar(
                    x=[[quarter]*len(filtered_data), [month]*len(filtered_data), [f"Week {week}"]*len(filtered_data)],
                    y=filtered_data['Tip'],
                    name=f" {month}, {quarter}"
                ))

fig.update_layout(title_text="Tips Distribution by  Month, and Quarter")

fig.show()



In [12]:
fig = go.Figure()

for quarter in quarters:
        for week in weeks:
            filtered_data = grouped_data[(grouped_data['Week_Number'] == week) & 
                                         (grouped_data['Quarter'] == quarter)]
            if not filtered_data.empty:
                fig.add_trace(go.Box(
                    x=[filtered_data['Quarter'].tolist(),
                       #filtered_data['Month'].tolist(), 
                       filtered_data['Week_Number'].tolist(),

                       ],
                    y=filtered_data['Tip'],
                    name=f"Week {week}, {quarter}"
                ))

fig.update_layout(title_text="Tips Distribution by Week, Month, and Quarter",
                  boxmode='group')
#fig.update_xaxes(type='multicategory')

fig.show()