In [2]:
import pandas as pd 
import re
from datetime import datetime
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#function to parse the date range for weekly column names
def parse_date_range(date_range_str):
    """
    Convert date range strings like "Jan 1-7, 2023" or "Jan 29 - Feb 4, 2023" to datetime objects.
    """
    # Extract the month and year
    match = re.match(r'([a-zA-Z]+) (\d+)-(\d+), (\d+)', date_range_str)
    if match:
        month = match.group(1)
        start_day = match.group(2)
        end_day = match.group(3)
        year = match.group(4)
        start_date_str = f"{month} {start_day}, {year}"
        start_date = datetime.strptime(start_date_str, '%b %d, %Y')
        return start_date
    else:
        # Handle cases like "Jan 29 - Feb 4, 2023"
        match = re.match(r'([a-zA-Z]+) (\d+) - ([a-zA-Z]+) (\d+), (\d+)', date_range_str)
        if match:
            start_month = match.group(1)
            start_day = match.group(2)
            end_month = match.group(3)
            end_day = match.group(4)
            year = match.group(5)
            start_date_str = f"{start_month} {start_day}, {year}"
            start_date = datetime.strptime(start_date_str, '%b %d, %Y')
            return start_date

In [4]:
mydatesby = 'daily'  # 'daily' or 'weekly'
if mydatesby == 'daily':
    df = pd.read_excel('rp_daily_JAN23_MAY24_pasteValues.xlsx', skiprows=4)
elif mydatesby == 'weekly':
    df = pd.read_excel('rp_week_JAN23_MAY24_pasteValues.xlsx', skiprows=4)

df = df.rename(columns={df.columns[0]: 'name'})

df = df[[col for col in df.columns if 'Total' not in col]]


# Assuming the first column is named 'A' or adjust according to your column name
df[df.columns[0]] = df[df.columns[0]].str.strip()

# Melt the DataFrame to have 'Dates' and 'Values' columns, keeping the first column intact
melted_df = pd.melt(df, id_vars=[df.columns[0]], var_name='dates', value_name='values')

# Convert 'Values' column to numeric, forcing errors to NaN
melted_df['values'] = pd.to_numeric(melted_df['values'], errors='coerce')

# Convert the 'Dates' column to datetime
if mydatesby == 'weekly':
    melted_df['dates'] = melted_df['dates'].apply(parse_date_range)
elif mydatesby == 'daily':
    melted_df['dates'] = pd.to_datetime(melted_df['dates'])

melted_df['dates'] = melted_df['dates'].dt.strftime('%Y-%m-%d')

# melted_df = melted_df.dropna(subset=[melted_df.columns[0]])
melted_df = melted_df.dropna(subset=[melted_df.columns[1]])
melted_df = melted_df.dropna(subset=[melted_df.columns[2]])


melted_df['values'] = melted_df.apply(lambda row: row['values'] if pd.notna(row[melted_df.columns[0]]) else 0, axis=1)

print(melted_df)

                                                    name       dates    values
1                                             Net Income  2023-01-01 -54878.03
9                                       Prepaid Expenses  2023-01-01  45000.00
12          Accumulated Depreciation - Machinery & Equip  2023-01-01    506.81
14     Equipment - NYC:Accumulated Depreciation - NYC...  2023-01-01   3119.92
15     Fixed Asset Computers - Corporate:Accumulated ...  2023-01-01    192.19
...                                                  ...         ...       ...
35126          Net cash provided by operating activities  2024-05-31  29734.69
35128                                      Goldman - HYS  2024-05-31 -15476.79
35142          Net cash provided by investing activities  2024-05-31 -15476.79
35150          Net cash provided by financing activities  2024-05-31      0.00
35151                       Net cash increase for period  2024-05-31  14257.90

[5637 rows x 3 columns]


### Plot Net Income with Prophet

In [5]:
def prophet_plot(df, row_name, start_d, end_d, n_weeks = 13):
    df = df[(df['dates'] >= start_d) & (df['dates'] <= end_d)]

    df_piv = df.pivot(index='name', columns='dates', values='values')

    #filter by row
    selected_row = df_piv.loc[row_name,:]

    new_df = pd.DataFrame({
        'ds': df_piv.columns,
        'y': selected_row.values,
        })
    
    m = Prophet(changepoint_prior_scale=0.5).fit(new_df)
    # m.fit(new_df)

    future = m.make_future_dataframe(periods= n_weeks*7)
    # Python
    forecast = m.predict(future)
    return plot_plotly(m, forecast)
    

In [13]:
# prophet_plot(new_df, 'Net Income')
# prophet_plot(pivoted_df, 'Inventory')
# prophet_plot(pivoted_df, 'SBA Loan')
# prophet_plot(melted_df, 'Net Income', '2023-01-01','2024-06-30')
prophet_plot(melted_df, 'Inventory', '2023-01-01','2024-06-30')
# prophet_plot(melted_df, 'SBA Loan', '2023-01-01','2024-06-30')
# prophet_plot(melted_df, 'WEHO Club CC 2143', '20234-01-01','2024-06-30')


09:14:16 - cmdstanpy - INFO - Chain [1] start processing
09:14:17 - cmdstanpy - INFO - Chain [1] done processing


### Apply Prophet

In [7]:
# fit
m = Prophet()
m.fit(new_df)

future = m.make_future_dataframe(periods=13*7)


NameError: name 'new_df' is not defined

In [None]:
# Python
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

# fig1 = m.plot(forecast)
plot_plotly(m, forecast)
