In [2]:
from dashboard.logic.io import GSHEETS_URL, read_gsheet, comment_button
import pandas as pd
import numpy as np

df = read_gsheet(
    GSHEETS_URL, 
    header=None
)

In [3]:
def findRowColRegex(df: pd.DataFrame, pat: str, case: bool=True, regex:bool=True):
    
    # make sure only one such pat exists in the df
    df_mask = df.apply(lambda x: x.str.contains(pat, case=case, regex=regex) if x.dtype == 'object' else None)
    
    count = df_mask.sum().sum()
    
    if count < 1: 
        raise ValueError(f'Given {pat} did not give any results.')
    if count > 1:
        raise ValueError(f'Given {pat} gave more than 1 results.')
     
    # col and row values where pat
    row = df_mask.any(axis='columns').argmax()
    col = df_mask.any(axis='index').argmax()
    
    return row, col

def getDataFrames(df: pd.DataFrame):

    df = df.reset_index(drop=True)
    dataframes_dict = {}
    
    # MAIN DF
    main_df_idx2 = (df == 'Monthly Income').any(axis='columns').argmax()
    dataframes_dict['main'] = df.iloc[:main_df_idx2+1,:3].dropna(how='all', axis='rows')
    
    # ADS DF
    r1, c1 = findRowColRegex(df, "My Finance Course")
    
    
    ads_df = df.iloc[r1:,c1:].reset_index(drop=True)
    ads_df = (ads_df
        .loc[:(ads_df[c1].isna()).argmax()-1, :]
        .dropna(how='all', axis='columns')
    )
    dataframes_dict['ads'] = ads_df
    
    # OTHER DFS
    df_names = [
    'announcements',
    'advice',
    'error_warning',
    'risk',
    'skip',
    'skip',
    'skip',
    'historical',
    'cash_pos',
    'general_notes',
    'success',
    ]
    
    # Find rows after main df where all values are NaN-s.
    nan_rows = df.iloc[main_df_idx2+1:,].isna().all(axis='columns')
    
    # Indices where all NaN-s in the row
    dfs_idxs = []
    
    for i,bool in nan_rows.items():
        
        if i == nan_rows.index[-1]:
            dfs_idxs.append(None)
            break
        if bool:
            dfs_idxs.append(i)
    
    # raw dataframes
    dfs = []        
    for i,idx in enumerate(dfs_idxs):
        if i == len(dfs_idxs) - 1:
            break
        dfs.append(df.iloc[idx:dfs_idxs[i+1]])
    
    # strip dataframes from NaN-s and add to dictionary
    for df_, name in zip(dfs, df_names):
        
        if name == 'skip': continue
        
        dataframes_dict[name] = (df_
            .dropna(how='all', axis='columns')
            .dropna(how='all', axis='rows')
        )
    
    return dataframes_dict

df1 = df.copy()

#findRowColRegex(df1, 'My Finance Course')

df_dict = getDataFrames(df1)
df_dict.keys()



In [61]:
df_a = df_dict['announcements'].copy()
df_a = df_a.reset_index(drop=True)
df_a = df_a[0].str.split(pat=':', n=1, expand=True)
df_a.columns = ['date', 'text']
df_a.loc[df_a[df_a['date'] == 'Post to Patreon'].index[0], 'date'] = 'Jan 2023'
month_date = df_a.date.str.split(' ', n=1)
df_a['year'] = [i[1] for i in month_date]
df_a['month'] = [i[0] for i in month_date]
df_a

Unnamed: 0,date,text,year,month
0,Jan 2023,Property Purchase 23rd Jan 2023. Cash reserve...,2023,Jan
1,Jan 2023,As central banks tighten (QT + Interest rate ...,2023,Jan
2,Jan 2023,I wouldn't be surprised if we see a small ral...,2023,Jan
3,Dec 2022,No change to last month. 2022 financial forec...,2022,Dec
4,Nov 2022,Asset prices are now beginning to fall in cor...,2022,Nov
5,Oct 2022,Cash is king right now. Assets are finally st...,2022,Oct
6,Sep 2022,Cash currently held between multiple bank acc...,2022,Sep
7,Aug 2022,"As per last month, I am still accumulating ca...",2022,Aug
8,Jul 2022,I'm following my February plan of going to CA...,2022,Jul


In [67]:
date_dummy = None
dates = []
for i, row in df_a.iterrows():

    if date_dummy is None:
        date_dummy = row['date']
    if row['date'] == date_dummy:
        dates.append(row['date'])
        date_dummy = row['date']
    else:
        print(dates)
        dates = []

['Jan 2023', 'Jan 2023', 'Jan 2023']
[]
[]
[]
[]
[]
