In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

import duckdb
import numpy as np

In [2]:
conn = duckdb.connect(database=':memory:')
raw = pd.read_excel('fishing-trip.xlsx', usecols='A:L')


In [3]:
raw['depth'] = raw['depth'] * -1
raw['time'] = raw['datetime'].dt.time

raw.sort_values(by='datetime', inplace=True)

raw.head()

Unnamed: 0,id,year,fisherman,day,datetime,fish_species,kept,length,depth,bait,weight_calc,location,time
0,1,2023,tyler,thursday,2023-06-15 12:45:00,walleye,True,14.0,-9.5,,1.2,,12:45:00
1,2,2023,brian,thursday,2023-06-15 12:50:00,walleye,True,17.0,-9.5,,1.46,,12:50:00
3,4,2023,brent,thursday,2023-06-15 13:10:00,walleye,True,15.0,-9.5,,1.29,,13:10:00
2,3,2023,tyler,thursday,2023-06-15 13:15:00,walleye,True,15.5,-9.5,,1.33,,13:15:00
4,5,2023,brent,thursday,2023-06-15 14:00:00,walleye,True,13.0,-9.0,,1.12,,14:00:00


In [4]:
data = raw[(raw['fish_species'] == 'walleye')].reset_index(drop=True)

# Extracting just the time for plotting purposes and filtering
data['time_of_day'] = data['datetime'].dt.time
data.sort_values('time_of_day', inplace=True)

# Convert time_of_day to minutes since 8 AM
data['minutes_since_8AM'] = data['datetime'].apply(lambda dt: (dt.hour - 8) * 60 + dt.minute)

min_size, max_size = 10, 30
length_min, length_max = data['length'].min(), data['length'].max()
data['scaled_length'] = data['length'].apply(
    lambda x: ((x - length_min) / (length_max - length_min) * (max_size - min_size) + min_size)
)

data.head()

Unnamed: 0,id,year,fisherman,day,datetime,fish_species,kept,length,depth,bait,weight_calc,location,time,time_of_day,minutes_since_8AM,scaled_length
16,17,2023,tyler,saturday,2023-06-17 08:30:00,walleye,True,17.0,-10.0,leech,1.35,pump house,08:30:00,08:30:00,30,18.421053
17,18,2023,brent,saturday,2023-06-17 08:43:00,walleye,True,17.75,-9.0,crawler,1.41,pump house,08:43:00,08:43:00,43,20.0
32,33,2024,brent,friday,2024-06-14 08:45:00,walleye,True,16.0,-11.0,crawler,1.56,1 up tall prarie chicken,08:45:00,08:45:00,45,16.315789
33,34,2024,brent,friday,2024-06-14 08:50:00,walleye,True,17.5,-9.5,crawler,1.71,1 up tall prarie chicken,08:50:00,08:50:00,50,19.473684
34,35,2024,brent,friday,2024-06-14 09:30:00,walleye,True,17.0,-9.5,crawler,1.66,1 up tall prarie chicken,09:30:00,09:30:00,90,18.421053


In [5]:
fig = go.Figure()

# Plot for depth analysis with violin plot
# fig.add_trace(
#     go.Violin(y=data['depth'], x=data['length'], name='Fish by Depth', 
#               points='all', orientation='h')
# )

fig = px.violin(data, y="depth", box=True,
                points='all',  # This plots all points
                width=400,
                title='Depth Analysis'
               )

# Adding a suffix to the y-axis tick labels
fig.update_layout(
    yaxis_tickformat=".2f'",  # .2f formats the labels to two decimal places with a feet symbol
    yaxis_title="Depth in Feet"
)

# Calculate quartiles and median
quartiles = np.percentile(data['depth'], [25, 50, 75])
min_val, max_val = data['depth'].min(), data['depth'].max()

# Adding annotations for the quartiles and median
fig.add_annotation(x=1.05, y=quartiles[0], xref="paper", yref="y",
                   text="Q3: {:.2f}'".format(quartiles[0]), showarrow=False)
fig.add_annotation(x=1.05, y=quartiles[1], xref="paper", yref="y",
                   text="Median: {:.2f}'".format(quartiles[1]), showarrow=False)
fig.add_annotation(x=1.05, y=quartiles[2], xref="paper", yref="y",
                   text="Q1: {:.2f}'".format(quartiles[2]), showarrow=False)
fig.add_annotation(x=1.05, y=min_val, xref="paper", yref="y",
                   text="Max: {:.2f}'".format(min_val), showarrow=False)
fig.add_annotation(x=1.05, y=max_val, xref="paper", yref="y",
                   text="Min: {:.2f}'".format(max_val), showarrow=False)

# Update layout to make space for annotations
fig.update_layout()  # Increase right margin to fit annotations


fig.show()

In [6]:
# Ensure 'data' has 'datetime' as a pandas datetime type
data['datetime'] = pd.to_datetime(data['datetime'])

# Group data by hour and compute counts and average length
data['hour'] = data['datetime'].dt.hour
aggregated_data = data.groupby('hour').agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

# Convert 24-hour to 12-hour format for the x-axis labels
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

max_count = aggregated_data['counts'].max()

# Define a custom blue color scale that darkens with increasing values
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Plotting the data using a uniform blue color scale that darkens with higher averages
fig = px.bar(
    aggregated_data,
    x='hour_12',
    y='counts',
    title='Fishing Activity by Hour of the Day',
    labels={'hour_12': 'Hour of the Day', 'counts': 'Number of Fish Caught'},
    color='average_length',
    text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
    color_continuous_scale=blue_scale  # Use the custom blue scale
)

fig.update_traces(texttemplate='%{text}', textposition='outside')  # Formatting the text labels to appear outside the bars
fig.update_layout(
    xaxis_title='Hour of the Day',
    yaxis_title='Number of Fish Caught',
    xaxis={'type': 'category', 'tickmode': 'array', 'tickvals': aggregated_data['hour_12'], 'ticktext': aggregated_data['hour_12']},
    yaxis=dict(range=[0, max_count * 1.25]),
    coloraxis_colorbar=dict(title='Average Length (inches)')
)
fig.show()


In [9]:
import plotly.graph_objects as go
import numpy as np
import pandas as pd

# Assuming 'data' is your DataFrame and includes a 'year' column alongside 'depth'
data['year'] = pd.Categorical(data['year'])  # Ensure 'year' is a categorical data type

fig = go.Figure()

# Add split violin plot for the year 2023
fig.add_trace(go.Violin(x=data['year'][data['year'] == 2023].replace(2023, 'Year Comparison'),  # Constant x value
                        y=data['depth'][data['year'] == 2023],
                        name='2023',
                        side='negative',
                        line_color='blue')
             )

# Add split violin plot for the year 2024
fig.add_trace(go.Violin(x=data['year'][data['year'] == 2024].replace(2024, 'Year Comparison'),  # Constant x value
                        y=data['depth'][data['year'] == 2024],
                        name='2024',
                        side='positive',
                        line_color='orange')
             )

# Update traces to show all points, the box plot inside the violin, and make the mean line visible
fig.update_traces(points='all', box_visible=True, meanline_visible=True)

# Adjusting plot layout settings to widen the view
fig.update_layout(
    yaxis_tickformat=".2f'",
    yaxis_title="Depth in Feet",
    violingap=0,  # Remove gap between violins
    violinmode='overlay',  # Overlay violins on the same x position
    margin=dict(l=40, r=40, t=20, b=20),  # Adjust margins
    width=800  # Optionally adjust width
)

fig.show()



The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.


The behavior of Series.replace (and DataFrame.replace) with CategoricalDtype is deprecated. In a future version, replace will only be used for cases that preserve the categories. To change the categories, use ser.cat.rename_categories instead.



In [8]:
import pandas as pd
import plotly.express as px

# Assuming 'data' is your DataFrame and 'datetime' column exists
data['datetime'] = pd.to_datetime(data['datetime'])

# Extract hour and year from 'datetime'
data['hour'] = data['datetime'].dt.hour
data['year'] = data['datetime'].dt.year

# Group data by hour and year, and compute counts and average length
aggregated_data = data.groupby(['hour', 'year']).agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

max_count = data.groupby(['hour']).agg(
    counts=('id', 'size')
)['counts'].max()

print(max_count)

# Convert 24-hour to 12-hour format for the x-axis labels
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

# Define a custom blue color scale that darkens with increasing values
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Order for the 12-hour clock labels, ensuring correct sequence
hour_order = [f"{h}AM" for h in range(1, 12)] + ["12PM"] + [f"{h}PM" for h in range(1, 12)] + ["12AM"]

# Plotting the data using a uniform blue color scale that darkens with higher averages
fig = px.bar(
    aggregated_data,
    x='hour_12',
    y='counts',
    title='Fishing Activity by Hour of the Day',
    labels={'hour_12': 'Hour of the Day', 'counts': 'Number of Fish Caught'},
    color='average_length',
    text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
    color_continuous_scale=blue_scale,
    barmode='group',  # Ensures bars are grouped, not stacked
    category_orders={"hour_12": hour_order, "year": sorted(aggregated_data['year'].unique())}
)

fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(
    xaxis_title='Hour of the Day',
    yaxis_title='Number of Fish Caught',
    xaxis={'type': 'category'},
    yaxis=dict(range=[0, max_count * 1.25]),
    coloraxis_colorbar=dict(title='Average Length (inches)')
)
fig.show()


13


In [8]:
import plotly.express as px
import datetime

# Extracting just the time for plotting purposes and filtering
data['time_of_day'] = data['datetime'].dt.time
data.sort_values('time_of_day', inplace=True)

data['year_color'] = data['year'].astype(str)

# Convert time_of_day to minutes since 8 AM
data['minutes_since_8AM'] = data['datetime'].apply(lambda dt: (dt.hour - 8) * 60 + dt.minute)

min_size, max_size = 10, 30
length_min, length_max = data['length'].min(), data['length'].max()
data['scaled_length'] = data['length'].apply(
    lambda x: ((x - length_min) / (length_max - length_min) * (max_size - min_size) + min_size)
)

# Adding a column to store the time of day in HH:MM format
data['time_of_day_HHMM'] = data['datetime'].dt.strftime('%H:%M')

# Creating the scatter plot with custom colors for 2023 and 2024
fig = px.scatter(
    data,
    x='minutes_since_8AM',
    y='depth',
    size='scaled_length',  # Using scaled length for size
    color='year_color',
    opacity=0.5,
    title='Scatter Plot of Depth vs. Time of Day with Dot Size Representing Fish Length',
    labels={
        'depth': 'Depth (feet)',
        'time_of_day_HHMM': 'Time of Day (HH:MM)'
    },
    hover_data={'length': True, 'time_of_day_HHMM': True},  # Show actual length and formatted time on hover
    color_discrete_map={
        '2023': 'blue',
        '2024': 'red'
    }
)

# Define hourly ticks from 8 AM to 8 PM
hourly_ticks = [(h - 8) * 60 for h in range(8, 21)]
hour_labels = [f"{h}:00" for h in range(8, 21)]

# Set custom tick labels
fig.update_xaxes(tickvals=hourly_ticks, ticktext=hour_labels)

# Set titles for clarity
fig.update_layout(xaxis_title='Time of Day (8 AM to 8 PM)', yaxis_title='Depth (feet)')

fig.show()


In [23]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Prepare the data
data['hour'] = data['datetime'].dt.hour
aggregated_data = data.groupby('hour').agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

# Convert 24-hour to 12-hour format
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

# Define a custom blue color scale
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Create the subplot figure
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1)

# Bar chart for the second row
fig.add_trace(
    go.Bar(
        x=aggregated_data['hour_12'],
        y=aggregated_data['counts'],
        marker=dict(
            color=aggregated_data['average_length'],  # Assign color based on average length
            colorscale=blue_scale,  # Apply custom blue color scale
            cmin=aggregated_data['average_length'].min(),  # Minimum value for color scaling
            cmax=aggregated_data['average_length'].max()   # Maximum value for color scaling
        ),
        text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
        name='Fish Caught by Hour'
    ),
    row=2, col=1
)

# Update the bar chart trace to format text labels
fig.update_traces(texttemplate='%{text}', textposition='outside', selector=dict(type='bar'))

# Scatter plot for the first row
fig.add_trace(
    go.Scatter(
        x=data['minutes_since_8AM'],
        y=data['depth'],
        mode='markers',
        marker=dict(size=data['scaled_length'], color=data['depth'], showscale=False),
        name='Depth vs. Time of Day'
    ),
    row=1, col=1
)

# Set categorical x-axis for the bar chart
fig.update_xaxes(
    title='Hour of the Day',
    type='category',
    tickmode='array',
    tickvals=aggregated_data['hour_12'],
    ticktext=aggregated_data['hour_12'],
    row=2, col=1
)

# Set continuous x-axis for the scatter plot
fig.update_xaxes(
    title='Time of Day (8 AM to 8 PM)',
    row=1, col=1
)

# Set y-axis for bar chart
fig.update_yaxes(title='Number of Fish Caught', range=[0, aggregated_data['counts'].max() * 1.15], row=2, col=1)

# Set y-axis for scatter plot
fig.update_yaxes(title='Depth (feet)', row=1, col=1)

# Update overall layout and title
fig.update_layout(height=800, title_text="Fishing Data Analysis: Depth and Activity by Time of Day")

# Show plot
fig.show()


In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Prepare the data
data['hour'] = data['datetime'].dt.hour
aggregated_data = data.groupby('hour').agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

# Convert 24-hour to 12-hour format
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

# Define a custom blue color scale
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Calculate common y-axis range with a 15% buffer for scatter and violin plots
depth_min = data['depth'].min()
depth_max = data['depth'].max()
depth_range = depth_max - depth_min
depth_buffer = depth_range * 0.15
common_y_range = [depth_min - depth_buffer, depth_max + depth_buffer]

# Calculate buffer for the histogram
counts_max = aggregated_data['counts'].max()
counts_buffer = counts_max * 0.15

# Create the subplot figure with specified grid layout
fig = make_subplots(
    rows=2, cols=2,
    column_widths=[0.7, 0.3],
    specs=[
        [{'type': 'scatter'}, {'type': 'violin'}],
        [{'type': 'xy'}, {'type': 'domain'}]
    ],
    shared_xaxes=False,
    vertical_spacing=0.1,
    horizontal_spacing=0.05
)

# Scatter plot for the first row, first column
fig.add_trace(
    go.Scatter(
        x=data['minutes_since_8AM'],
        y=data['depth'],
        mode='markers',
        marker=dict(size=data['scaled_length'], color=data['depth'], showscale=True),
        name='Depth vs. Time of Day'
    ),
    row=1, col=1
)

# Violin plot in the first row, second column
fig.add_trace(
    go.Violin(
        y=data['depth'],
        box_visible=True,
        line_color='blue',
        points='all',
        name='Depth Analysis'
    ),
    row=1, col=2
)

# Set the same y-axis range for both the scatter and violin plots
fig.update_yaxes(title='Depth (feet)', range=common_y_range, row=1, col=1)
fig.update_yaxes(title='Depth in Feet', range=common_y_range, row=1, col=2)

# Bar chart for the second row, first column
fig.add_trace(
    go.Bar(
        x=aggregated_data['hour_12'],
        y=aggregated_data['counts'],
        marker=dict(
            color=aggregated_data['average_length'],
            colorscale=blue_scale
        ),
        text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
        name='Fish Caught by Hour'
    ),
    row=2, col=1
)

# Ensure the labels on the histogram are horizontal
fig.update_xaxes(tickangle=0, tickmode='array', tickvals=aggregated_data['hour_12'], ticktext=aggregated_data['hour_12'], row=2, col=1)

# Update the y-axis range for the histogram to include the buffer
fig.update_yaxes(title='Number of Fish Caught', range=[0, counts_max + counts_buffer], row=2, col=1)

# Pie chart in the second row, second column
bait_counts = data['bait'].value_counts().reset_index()
bait_counts.columns = ['bait', 'counts']
fig.add_trace(
    go.Pie(labels=bait_counts['bait'], values=bait_counts['counts'], name='Fish Caught by Bait',
           textinfo='label+percent', insidetextorientation='horizontal'),
    row=2, col=2
)

# Set axes titles for all plots
fig.update_xaxes(title='Hour of the Day', row=2, col=1)
fig.update_xaxes(title='Time of Day (8 AM to 8 PM)', row=1, col=1)

# Update overall layout and title
fig.update_layout(height=800, title_text="Fishing Data Analysis: Depth and Activity by Time of Day")

# Show plot
fig.show()


In [7]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Prepare the data
data['hour'] = data['datetime'].dt.hour
aggregated_data = data.groupby('hour').agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

# Convert 24-hour to 12-hour format
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

# Create an array for every hour from 8 AM to 8 PM in minutes since 8 AM
hour_ticks = np.arange(0, 10 * 60, 60)  # Every 60 minutes for 13 hours starting from 8 AM (0 minutes)

# Generate corresponding 12-hour format labels for each tick
hour_labels = [(f"{(hour % 12 or 12)}{'AM' if hour < 12 or hour == 24 else 'PM'}") for hour in range(8, 18)]

# Define a custom blue color scale
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Calculate common y-axis range with a 15% buffer for scatter and violin plots
depth_min = data['depth'].min()
depth_max = data['depth'].max()
depth_range = depth_max - depth_min
depth_buffer = depth_range * 0.25
common_y_range = [depth_min - depth_buffer, depth_max + depth_buffer]

# Calculate buffer for the histogram
counts_max = aggregated_data['counts'].max()
counts_buffer = counts_max * 0.15

# Create the subplot figure with specified grid layout
fig = make_subplots(
    rows=2, cols=2,
    column_widths=[0.7, 0.3],
    specs=[
        [{'type': 'scatter'}, {'type': 'violin'}],  # First row: scatter and violin
        [{'type': 'xy'}, {'type': 'domain'}]  # Second row: bar chart and pie chart
    ],
    shared_xaxes=False,
    vertical_spacing=0.1,
    horizontal_spacing=0.05
)

# Scatter plot for the first row, first column
fig.add_trace(
    go.Scatter(
        x=data['minutes_since_8AM'],
        y=data['depth'],
        mode='markers',
        marker=dict(
            size=data['scaled_length'], 
            color=data['depth'], 
            showscale=False  # Hide the color scale legend
        ),
        name='Depth vs. Time of Day'
    ),
    row=1, col=1
)

fig.add_annotation(
    text="Bubble size represents size of fish",
    xref="paper", yref="paper",
    x=0.05, y=1,  # Positioning the annotation at the top left of the subplot
    showarrow=False,
    font=dict(size=12, color="black"),
    align="left",
    bgcolor="white",
    bordercolor="black",
    borderpad=4
)

# Violin plot in the first row, second column
fig.add_trace(
    go.Violin(
        y=data['depth'],
        box_visible=True,
        line_color='blue',
        points='all',
        name='Depth Analysis'
    ),
    row=1, col=2
)

# Set the same y-axis range for both the scatter and violin plots
fig.update_yaxes(title='Depth (feet)', range=common_y_range, row=1, col=1)
fig.update_yaxes(title='Depth in Feet', range=common_y_range, row=1, col=2)


# Bar chart for the second row, first column
fig.add_trace(
    go.Bar(
        x=aggregated_data['hour_12'],
        y=aggregated_data['counts'],
        marker=dict(
            color=aggregated_data['average_length'],
            colorscale=blue_scale
        ),
        text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
        textposition='outside',  # Ensure labels are outside
        name='Fish Caught by Hour'
    ),
    row=2, col=1
)

# Set tick labels to be horizontal
fig.update_xaxes(tickangle=0, row=2, col=1)

# Ensure the labels on the histogram are horizontal
fig.update_xaxes(tickangle=0, tickmode='array', tickvals=aggregated_data['hour_12'], ticktext=aggregated_data['hour_12'], row=2, col=1)

# Update the y-axis range for the histogram to include the buffer
fig.update_yaxes(title='Number of Fish Caught', range=[0, counts_max + counts_buffer], row=2, col=1)

# Set the x-axis to use 12-hour formatted labels
fig.update_xaxes(
    title='Time of Day (8 AM to 8 PM)',
    tickmode='array',
    tickvals=hour_ticks,  # Set tick positions at each hour mark
    ticktext=hour_labels,  # Label each tick with corresponding 12-hour format
    row=1, col=1
)

# Update x-axis for the histogram in a similar manner to maintain consistency
fig.update_xaxes(
    tickangle=0,
    tickmode='array',
    tickvals=aggregated_data['hour_12'],
    ticktext=aggregated_data['hour_12'],
    row=2, col=1
)

# Pie chart in the second row, second column
bait_counts = data['bait'].value_counts().reset_index()
bait_counts.columns = ['bait', 'counts']
fig.add_trace(
    go.Pie(labels=bait_counts['bait'], values=bait_counts['counts'], name='Fish Caught by Bait',
           textinfo='label+percent', insidetextorientation='horizontal'),
    row=2, col=2
)

# Set axes titles for all plots
fig.update_xaxes(title='Hour of the Day', row=2, col=1)
fig.update_xaxes(title='Time of Day (8 AM to 8 PM)', row=1, col=1)

# Update overall layout and title
fig.update_layout(height=800, title_text="Fishing Data Analysis: Depth and Activity by Time of Day")

# Show plot
fig.show()


In [294]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Prepare the data
data['hour'] = data['datetime'].dt.hour
aggregated_data = data.groupby('hour').agg(
    counts=('id', 'size'),
    average_length=('length', 'mean')
).reset_index()

# Convert 24-hour to 12-hour format
aggregated_data['hour_12'] = aggregated_data['hour'].apply(lambda x: f"{(x % 12 or 12)}{'AM' if x < 12 else 'PM'}")

# Create an array for every hour from 8 AM to 8 PM in minutes since 8 AM
hour_ticks = np.arange(0, 10 * 60, 60)  # Every 60 minutes for 13 hours starting from 8 AM (0 minutes)

# Generate corresponding 12-hour format labels for each tick
hour_labels = [(f"{(hour % 12 or 12)}{'AM' if hour < 12 or hour == 24 else 'PM'}") for hour in range(8, 18)]

# Define a custom blue color scale
blue_scale = [
    [0.0, 'rgb(173, 216, 230)'],  # Lighter blue
    [1.0, 'rgb(0, 0, 139)']       # Darker blue
]

# Calculate common y-axis range with a 15% buffer for scatter and violin plots
depth_min = data['depth'].min()
depth_max = data['depth'].max()
depth_range = depth_max - depth_min
depth_buffer = depth_range * 0.25
common_y_range = [depth_min - depth_buffer, depth_max + depth_buffer]

# Calculate buffer for the histogram
counts_max = aggregated_data['counts'].max()
counts_buffer = counts_max * 0.15

# Create the subplot figure with specified grid layout
fig = make_subplots(
    rows=2, cols=2,
    column_widths=[0.7, 0.3],
    specs=[
        [{'type': 'scatter'}, {'type': 'violin'}],  # First row: scatter and violin
        [{'type': 'xy'}, {'type': 'domain'}]  # Second row: bar chart and pie chart
    ],
    shared_xaxes=False,
    vertical_spacing=0.1,
    horizontal_spacing=0.05
)

# Scatter plot for the first row, first column
fig.add_trace(
    go.Scatter(
        x=data['minutes_since_8AM'],
        y=data['depth'],
        mode='markers',
        marker=dict(
            size=data['scaled_length'], 
            color=data['year'], 
            showscale=False  # Hide the color scale legend
        ),
        name='Depth vs. Time of Day'
    ),
    row=1, col=1
)

fig.add_annotation(
    text="Bubble size represents size of fish",
    xref="paper", yref="paper",
    x=0.05, y=1,  # Positioning the annotation at the top left of the subplot
    showarrow=False,
    font=dict(size=12, color="black"),
    align="left",
    bgcolor="white",
    bordercolor="black",
    borderpad=4
)

# Violin plot in the first row, second column
fig.add_trace(
    go.Violin(
        y=data['depth'],
        box_visible=True,
        line_color='blue',
        points='all',
        name='Depth Analysis'
    ),
    row=1, col=2
)

# Set the same y-axis range for both the scatter and violin plots
fig.update_yaxes(title='Depth (feet)', range=common_y_range, row=1, col=1)
fig.update_yaxes(title='Depth in Feet', range=common_y_range, row=1, col=2)


# Bar chart for the second row, first column
fig.add_trace(
    go.Bar(
        x=aggregated_data['hour_12'],
        y=aggregated_data['counts'],
        marker=dict(
            color=aggregated_data['average_length'],
            colorscale=blue_scale
        ),
        text=aggregated_data.apply(lambda row: f"{row['counts']} Caught<br>Avg. Length: {row['average_length']:.2f}\"", axis=1),
        textposition='outside',  # Ensure labels are outside
        name='Fish Caught by Hour'
    ),
    row=2, col=1
)

# Set tick labels to be horizontal
fig.update_xaxes(tickangle=0, row=2, col=1)

# Ensure the labels on the histogram are horizontal
fig.update_xaxes(tickangle=0, tickmode='array', tickvals=aggregated_data['hour_12'], ticktext=aggregated_data['hour_12'], row=2, col=1)

# Update the y-axis range for the histogram to include the buffer
fig.update_yaxes(title='Number of Fish Caught', range=[0, counts_max + counts_buffer], row=2, col=1)

# Set the x-axis to use 12-hour formatted labels
fig.update_xaxes(
    title='Time of Day (8 AM to 8 PM)',
    tickmode='array',
    tickvals=hour_ticks,  # Set tick positions at each hour mark
    ticktext=hour_labels,  # Label each tick with corresponding 12-hour format
    row=1, col=1
)

# Update x-axis for the histogram in a similar manner to maintain consistency
fig.update_xaxes(
    tickangle=0,
    tickmode='array',
    tickvals=aggregated_data['hour_12'],
    ticktext=aggregated_data['hour_12'],
    row=2, col=1
)

# Pie chart in the second row, second column
bait_counts = data['bait'].value_counts().reset_index()
bait_counts.columns = ['bait', 'counts']
fig.add_trace(
    go.Pie(labels=bait_counts['bait'], values=bait_counts['counts'], name='Fish Caught by Bait',
           textinfo='label+percent', insidetextorientation='horizontal'),
    row=2, col=2
)

# Set axes titles for all plots
fig.update_xaxes(title='Hour of the Day', row=2, col=1)
fig.update_xaxes(title='Time of Day (8 AM to 8 PM)', row=1, col=1)

# Update overall layout and title
fig.update_layout(height=800, title_text="Fishing Data Analysis: Depth and Activity by Time of Day")

# Show plot
fig.show()


KeyError: 'scaled_length'

In [277]:
locations = data.groupby('location').size().reset_index(name='counts').sort_values('counts', ascending=False)

locations

Unnamed: 0,location,counts
2,"north of south of whilock, east side",14
0,1 up tall prarie chicken,10
1,mouth of cheyenne,9
6,tall prarie chicken,8
3,pump house,3
4,south of whitlock,1
5,south side of bend,1
