In [None]:
import numpy as np
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

In [None]:
def cosine_distribution(frequency, amplitude, phase, bias):
  """
  Generates a cosine distribution with given frequency, amplitude, phase and bias values 
  and returns the respective numpy array of generated values.
  """
    
  # Only the first 8 hours of the daily demand look like a cosine distribution. 
  # So 1 period of the cosine distribution will represent 1/3 of the daily demand (hours 0-7).
  # Therefore, generating 24 x values, is equal to 3 periods of the cosine distribution.
  x = np.linspace(0, 3 * 2 * np.pi / frequency, 24) # period = 2 * np.pi / frequency

  # Generating 24 data points using the cosine distribution
  y = amplitude * np.cos(frequency * x + phase) + bias

  # Adding random noise  
  y += np.random.normal(0, 0.1, 24) * amplitude

  return y

In [None]:
def generate_peak(peak_val, respective_h, diff_between_max_min):
  """
    Generates a normal distribution with given mean ('respective_h') and
    then transforms it so that it represents the daily demand, where 'peak_val' is its
    peak value, 'respective_h' is the respective peak's hour and 'diff_between_max_min' is the 
    difference between the max and min demand values during the time interval (8-15 hour). 
    It returns the respective numpy array containing the daily demand values for each hour.  
  """

  mean = respective_h
  std = 0.2 * mean

  # Numpy array containing hours from 0 to 23.
  x = np.arange(24)
  
  # Gaussian distribution equation which produces probability values.
  # x = mean value, corresponds with the highest probability value
  y = 1/(std * np.sqrt(2 * np.pi)) * np.exp(-1/2 * ((x - mean) / std) ** 2)

  # Probability values are between 0 and 1, but we need them to represent demand values,
  # where the difference between the max and min values is 'diff_between_max_min'. 
  y *= (diff_between_max_min / np.max(y))

  # If the data distribution still doesn't include the peak value, we transform it again,
  # by shifting the values vertically till we make that possible.
  y += (peak_val - np.max(y)) 

  # Adding some random noise.
  y += np.random.normal(0, 0.1, 24) * diff_between_max_min/2

  return y

In [None]:
def generate_daily_demand():
  """
    Generates the daily demand for car sharing service in a german city. It uses the helper functions
    defined above and returns a numpy array containing the hourly demand values (23 values) for a day, 
    the maximum demand value during the day and the respective hour.
  """

  # Setting the parameters for the cosine distribution, to imitate the first 8 hours of the daily demand
  # whose values are between 0 and 95
  frequency = 0.5    # Frequency of the cosine wave
  amplitude = 45    # Amplitude of the cosine wave, to scale from (-1, 1) to (-45, 45)
  phase = 0.0        # Phase shift of the cosine wave
  bias = 50         # Bias needed to shift the plot up 50 units

  # Generating the entire daily demand, despite the fact that only the first 8 hours 
  # will be represented by the cosine distribution
  y1 = cosine_distribution(frequency, amplitude, phase, bias)

  # Generating the first normal distribution, representing demand for the next 8 hours. 
  # I have choosed mean value equal to 12, since we need the first demand peak value at around 12 hour.
  y2 = generate_peak(200, 12, 110)

  # Performing the same step for the last 8 hours daily demand. During this time period the demand 
  # reaches its second peak at around 18 hour.  
  y3 = generate_peak(280, 18, 150) 

  # Definining 3 "window" lists containing weight values, which will multiply the daily demand represented by
  # the cosine and 2 normal distributions.
  # Weights are needed to make a smoother transition between the distributions
  # that we generated and also to make the peak more evident.
  window_1 = np.array([1] * 8 + [0] * 16)
  window_2 = np.array([0] * 8 + [1] * 6 + [1.1, 1.4] + [0] * 8)
  window_3 = np.array([0] * 16 + [1] * 8)
  
  # Multiplying each of the values that we got from each distribution with the respective weights
  result_1 = y1 * window_1
  result_2 = y2 * window_2
  result_3 = y3 * window_3

  # Adding the values of 3 lists in order to get the entire daily demand.
  final_result = result_1 + result_2 + result_3
  final_result = np.round(final_result)
  final_result = np.clip(final_result, 0, None)

  return final_result, np.max(final_result), np.argmax(final_result)

In [None]:
def generate_weekly_demand():
  """
    Generates the weekly demand for car sharing service in a german city. It uses the helper function
    defined above and returns 2 Pandas DataFrames, the first one containing the weekly demand values 
    and the respective hour, while the second only the records corresponding to the maximum demand values
    for each day.
  """

  # A list which will contain the hourly demand values for an entire week.
  weekly_demand = np.empty((0,))

  # A list which will contain the maximum demand value for each day during the week.
  top_7_max_demand = np.empty((7,))

  # A list which will contain the respective hour/time for the maximum daily demand value.
  respective_hours = np.empty((7,))
  
  for i in range (7):
    daily_demand, max_demand_per_day, max_demand_hour = generate_daily_demand()
    
    weekly_demand = np.concatenate((weekly_demand, daily_demand), axis=0)
    
    top_7_max_demand[i] = max_demand_per_day
    
    respective_hours[i] = max_demand_hour + i * 24

  # Creating a Pandas DataFrame containing 2 columns, "Hour" and "Demand" 
  df = pd.DataFrame({'Hour': np.arange(168), 'Demand': weekly_demand})

  # Creating a temporary Pandas DataFrame containing only the points representing max demand within a day
  max_daily_demand_df = pd.DataFrame({'Hour': respective_hours, 'Demand': top_7_max_demand})

  return df, max_daily_demand_df

In [None]:
def plot_weekly_demand(df, max_daily_demand_df):
  # Create scatter plot using plotly.graph_objects
  fig = go.Figure()

  # Add scatter trace
  fig.add_trace(go.Scatter(
      x=df['Hour'],  # x values
      y=df['Demand'],  # y values
      mode='markers',
      marker=dict(color='blue'),  # default marker color
      name='Demand vs Hour',  # name of the trace
      hovertemplate='Hour: %{customdata}<br>Demand: %{y}<extra></extra>',  # custom hover information
      customdata=df['Hour'] % 24  # custom data to display while hovering
  ))

  # Get indices of data points in temp_df
  indices = df.index[df['Hour'].isin(max_daily_demand_df['Hour']) & df['Demand'].isin(max_daily_demand_df['Demand'])].tolist()

  # Create a list of colors with 'red' for the data points in temp_df, and 'blue' for the rest
  colors = ['red' if i in indices else 'blue' for i in range(len(df))]

  # Update marker color in the scatter plot
  fig.update_traces(marker=dict(color=colors))

  # Add trendline using 'lowess' method
  fig.add_trace(go.Scatter(
      x=df['Hour'],  # x values
      y=df['Demand'],  # y values
      mode='lines',
      line=dict(color='black'),  # trendline color
      name='Trendline',  # name of the trendline trace
      xaxis='x',  # use x axis
      yaxis='y'  # use y axis
  ))

  # Set trendline options
  fig.update_traces(
      line=dict(dash='dash'),  # dash style for trendline
      xaxis='x',  # use x axis
      yaxis='y'  # use y axis
  )

  # Set x and y axis titles
  fig.update_xaxes(title_text='Hour')
  fig.update_yaxes(title_text='Demand')
  fig.update_layout(title='Demand vs Hour (Max daily demand in red)')

  # Show the plot
  fig.show()

In [None]:
df, max_daily_demand_df = generate_weekly_demand()

plot_weekly_demand(df, max_daily_demand_df)