In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
def calculate_confidence_interval(data, window, min_periods, z_score_threshold):
    mean = data.rolling(window=window, min_periods=min_periods).mean()
    std = data.rolling(window=window, min_periods=min_periods).std()
    lower_bound = mean - z_score_threshold * std
    upper_bound = mean + z_score_threshold * std
    return pd.DataFrame({'LowerBound': lower_bound, 'UpperBound': upper_bound})

In [3]:
def calculate_iqr_outliers(data, multiplier):
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - multiplier * IQR
    upper_bound = Q3 + multiplier * IQR
    outliers = (data < lower_bound) | (data > upper_bound)
    return outliers

In [4]:
def plot_with_confidence_interval_and_forecast_outliers(actual_data, confidence_interval, forecast_data, forecast_outliers, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=actual_data.index, y=actual_data, mode='lines', name='Actual Data'))
    fig.add_trace(go.Scatter(x=confidence_interval.index.tolist() + confidence_interval.index.tolist()[::-1], 
                             y=confidence_interval['LowerBound'].tolist() + confidence_interval['UpperBound'].tolist()[::-1], 
                             fill='toself', fillcolor='rgba(255,0,0,0.2)', line=dict(color='rgba(255,0,0,0)'), hoverinfo="skip", showlegend=False))
    fig.add_trace(go.Scatter(x=forecast_data.index, y=forecast_data[~forecast_outliers], mode='markers', name='Forecast (Non-Outliers)', marker=dict(color='blue')))
    fig.add_trace(go.Scatter(x=forecast_data.index, y=forecast_data[forecast_outliers], mode='markers', name='Forecast (Outliers)', marker=dict(color='orange')))
    fig.update_layout(title=title, xaxis_title="Date", yaxis_title="Value", legend_title="Legend")
    fig.show()

In [5]:

# Load the actual data from the Excel file
actual_data = pd.read_excel('platform_3m_actuals_new.xlsx')

# Filter the data to only include the "8k chassis" platform
actual_data = actual_data[actual_data['Platform'] == '8k_Chassis']

In [6]:
# Convert 'datestamp' to datetime and sort by it
actual_data['datestamp'] = pd.to_datetime(actual_data['datestamp'])
actual_data.sort_values('datestamp', inplace=True)

# Set 'datestamp' as the index and format it to show month and year
actual_data.set_index('datestamp', inplace=True)
actual_data.index = actual_data.index.strftime('%b %Y')

In [7]:
# Load the forecast data
forecast_data = pd.read_csv('mviai_platform_3m_forecast.csv')

# Calculate the confidence interval for the 'Bookings_3m' column in the actual data
confidence_interval = calculate_confidence_interval(actual_data['Bookings_3m'], window=3, min_periods=1, z_score_threshold=3)

# Calculate outliers in the forecast data
forecast_outliers = calculate_iqr_outliers(forecast_data['Forecast_3m'], multiplier=1.5)

In [8]:
# Call the function to create the plot
plot_with_confidence_interval_and_forecast_outliers(
    actual_data=actual_data['Bookings_3m'], 
    confidence_interval=confidence_interval, 
    forecast_data=forecast_data['Forecast_3m'], 
    forecast_outliers=forecast_outliers, 
    title='Bookings_3m with Confidence Interval and Forecast Outliers'
)