# 分析セクション
ここからは分析をするためのコードを書き出す。

### 月ごとの集計を始める

In [48]:
import pandas as pd

csv_file_path = 'test_modified.csv'
columns_to_read=['Date','SubjectCode','Amount']

df = pd.read_csv(csv_file_path,usecols=columns_to_read)

df['YearMonth']=df['Date'].str[:7]
df = df[['YearMonth','SubjectCode','Amount']]

pivot_df = df.pivot_table(index='YearMonth',columns='SubjectCode',values='Amount', aggfunc='sum').reset_index()
pivot_df = pivot_df.fillna(0)
pivot_df = pivot_df.astype({col: int for col in pivot_df.columns if col != 'YearMonth'})

pivot_df.to_csv('formatted_test.csv',index=False)

print(pivot_df)
#print(value: lambda monthly_summary['Subject'].values)

FileNotFoundError: [Errno 2] No such file or directory: 'test_modified.csv'

# 繰り越し

In [None]:
import pandas as pd

csv_file_path = 'formatted_test.csv'
df = pd.read_csv(csv_file_path)


def calculate_balancs(df):
    """
    Calculate balance sheet
    """
    asset_columns = [col for col in df.columns if col.startswith('100')]
    liability_columns = [col for col in df.columns if col.startswith('200')]
    income_columns = [col for col in df.columns if col.startswith('400')]
    expense_columns = [col for col in df.columns if col.startswith('500')]
    df["TotalAssets"]=df[asset_columns].sum(axis=1)
    df["TotalLiabilities"]=df[liability_columns].sum(axis=1)
    df["TotalIncome"]=df[income_columns].sum(axis=1)
    df["TotalExpenses"]=df[expense_columns].sum(axis=1)
    df["NetIncome"]= df['TotalIncome']-df['TotalExpenses']
    df["TotalEquity"]= df['TotalAssets']-df['TotalLiabilities']

    for i in range(1,len(df)):
        df.at[i, 'TotalAssets'] += df.at[i-1, 'TotalAssets']
        df.at[i, 'TotalLiabilities'] += df.at[i-1, 'TotalLiabilities']
        df.at[i, 'TotalIncome'] += df.at[i-1, 'TotalIncome']
        df.at[i, 'TotalExpenses'] += df.at[i-1, 'TotalExpenses']
        df.at[i, 'NetIncome'] += df.at[i-1, 'NetIncome']
        df.at[i, 'TotalEquity'] += df.at[i-1, 'TotalEquity']

    return df
result_df = calculate_balancs(df)
print(df)


  YearMonth   100    101    102    130    200    300    400   490   500  ...  \
0   2024-03 -6555 -26986  10000 -14100 -16856  52730 -43250 -1320  4713  ...   
1   2024-04 -1410  49114  15000      0      0      0 -71194     0     0  ...   

     572    590   599    600  TotalAssets  TotalLiabilities  TotalIncome  \
0  16200  14000  1820 -52730        -6555            -16856       -43250   
1      0      0     0      0        -7965            -16856      -114444   

   TotalExpenses  NetIncome  TotalEquity  
0           4713     -47963        10301  
1           4713    -119157         8891  

[2 rows x 28 columns]


In [73]:
import pandas as pd

class BalanceSheet:
    def __init__(self, csv_file_path):
        """
        Initialize BalanceSheet object.

        Args:
            csv_file_path (str): Path to the CSV file containing the data.
        """
        self.csv_file_path = csv_file_path
        self.df = None

    def preprocess_and_pivot(self):
        """
        Preprocess the data read from the CSV file, and pivot and format the DataFrame.

        Args:

        Returns:
            None
        """
        # データの読み込み
        columns_to_read=['Date','SubjectCode','Amount']
        df = pd.read_csv(self.csv_file_path, usecols=columns_to_read)

        # データの前処理
        df['YearMonth'] = df['Date'].str[:7]
        df = df[['YearMonth','SubjectCode','Amount']]

        # ピボットとフォーマット
        pivot_df = df.pivot_table(index='YearMonth', columns='SubjectCode', values='Amount', aggfunc='sum').reset_index()
        pivot_df = pivot_df.fillna(0)
        pivot_df = pivot_df.astype({col: int for col in pivot_df.columns if col != 'YearMonth'})

        # self.dfに保存
        self.df = pivot_df

    def calculate_balances(self):
        """
        Calculate balances.

        Args:

        Returns:
            None
        """
        asset_columns = [col for col in self.df.columns if str(col).startswith('1')]
        liability_columns = [col for col in self.df.columns if str(col).startswith('2')]
        income_columns = [col for col in self.df.columns if str(col).startswith('4')]
        expense_columns = [col for col in self.df.columns if str(col).startswith('5')]

        self.df["TotalAssets"]=self.df[asset_columns].sum(axis=1)
        self.df["TotalLiabilities"]=self.df[liability_columns].sum(axis=1)
        self.df["TotalIncome"]=self.df[income_columns].sum(axis=1)
        self.df["TotalExpenses"]=self.df[expense_columns].sum(axis=1)
        self.df["NetIncome"]= self.df['TotalIncome']-self.df['TotalExpenses']
        self.df["TotalEquity"]= self.df['TotalAssets']-self.df['TotalLiabilities']

        for i in range(1,len(self.df)):
            self.df.at[i, 'TotalAssets'] += self.df.at[i-1, 'TotalAssets']
            self.df.at[i, 'TotalLiabilities'] += self.df.at[i-1, 'TotalLiabilities']
            self.df.at[i, 'TotalIncome'] += self.df.at[i-1, 'TotalIncome']
            self.df.at[i, 'TotalExpenses'] += self.df.at[i-1, 'TotalExpenses']
            self.df.at[i, 'NetIncome'] += self.df.at[i-1, 'NetIncome']
            self.df.at[i, 'TotalEquity'] += self.df.at[i-1, 'TotalEquity']

    def show_summary(self):
        """
        Print the summary DataFrame.

        Args:

        Returns:
            None
        """
        print(self.df)

    def save_summary(self, output_file_path):
        """
        Save the summary DataFrame to a CSV file.

        Args:
            output_file_path (str): Path to the output CSV file.

        Returns:
            None
        """
        self.df.to_csv(output_file_path, index=False)

    def make_balance_sheet(self, output_file_path):
        """
        Run all methods in the correct order to generate the balance sheet.

        Args:
            output_file_path (str): Path to the output CSV file.

        Returns:
            None
        """
        self.preprocess_and_pivot()
        self.calculate_balances()
        self.show_summary()
        self.save_summary(output_file_path)

# 使用例

input_file_path = 'datas/test_modified.csv'
# input_file_path = 'datas/financial_records_modified.csv'
output_file_path = 'datas/test_balance_sheet.csv'
# output_file_path = 'datas/financial_balance_sheet.csv'
bs = BalanceSheet(input_file_path)
bs.make_balance_sheet(output_file_path)


SubjectCode YearMonth   100    101    102    130    200    300    400   490  \
0             2024-03 -6555 -26986  10000 -14100 -16856  52730 -43250 -1320   
1             2024-04 -1410  49114  15000      0      0      0 -71194     0   

SubjectCode   500  ...    572    590   599    600  TotalAssets  \
0            4713  ...  16200  14000  1820 -52730       -37641   
1               0  ...      0      0     0      0        25063   

SubjectCode  TotalLiabilities  TotalIncome  TotalExpenses  NetIncome  \
0                      -16856       -44570          96326    -140896   
1                      -16856      -115764         104816    -220580   

SubjectCode  TotalEquity  
0                 -20785  
1                  41919  

[2 rows x 28 columns]


### 繰り越しを行う

In [11]:
import pandas as pd
import numpy as np
import datetime as dt

def get_subject_sum(df):
    """
    Get the sum of the each subject code.
    Args:
        dataframe: Dataframe containing 'SubjectCode', 'Amount'
            SubjectCode (str): Subject code.
            Amount (int): Account amount.
    Returns:
        dataframe: Dataframe containing 'SubjectCode', 'Amount', 'YearMonth'
    """
    yearmonths = df['YearMonth'].values
    yearmonths = np.unique(yearmonths)
    rows = []
    for yearmonth in yearmonths:
        item = df[df['YearMonth'] == yearmonth]
        subjects = item['SubjectCode'].unique()
        for subject in subjects:
            value = pd.to_numeric(item[item['SubjectCode'] == subject]['Amount'], errors='coerce').sum()
            row = {
                'YearMonth': yearmonth,
                'SubjectCode': subject,
                'Amount': value
            }
            rows.append(row)
    sums = pd.DataFrame(rows)
    sums = sums.sort_values(by=['YearMonth', 'SubjectCode'])
    return sums

def add_yearmonth_column(df):
    """
    日付列から年月を抽出して新しい列を追加する
    Returns:
        dataframe: Dataframe with 'Year' and 'Month' columns added
        yearmongh column added as string
    """
    df['Date']=pd.to_datetime(df['Date'])
    df['Year']=df['Date'].dt.year
    df['Month']=df['Date'].dt.month
    df['YearMonth'] = df['Date'].apply(lambda x: x.strftime('%Y-%m'))
    return df

def get_subject_sum(df):
    """
    Get the sum of the each subject code.
    Args:
        dataframe: Dataframe containing 'SubjectCode', 'Amount'
            SubjectCode (str): Subject code.
            Amount (int): Account amount.
    Returns:
        dataframe: Dataframe containing 'SubjectCode', 'Amount', 'YearMonth'
    """
    yearmonths = df['YearMonth'].values
    yearmonths = np.unique(yearmonths)
    each_subject_rows = []
    each_category_rows = []
    for yearmonth in yearmonths:
        items_each_yearmonth = df[df['YearMonth'] == yearmonth]
        items_each_yearmonth = items_each_yearmonth.astype({'Amount': int, 'SubjectCode': str})

        for subject in items_each_yearmonth['SubjectCode'].unique():
            subject_sum = pd.to_numeric(items_each_yearmonth[items_each_yearmonth['SubjectCode'] == subject]['Amount'], errors='coerce').sum()
            each_subject_row = {
                'YearMonth': yearmonth,
                'SubjectCode': subject,
                'Amount': subject_sum
            }
            each_subject_rows.append(each_subject_row)
        for category in ['1','2','4','5']:
            category_items = []
            for item in items_each_yearmonth['SubjectCode']:
                if item.startswith(category):
                    category_row = {
                        'YearMonth': yearmonth,
                        'SubjectCode': item,
                        'Amount': items_each_yearmonth[items_each_yearmonth['SubjectCode'] == item]['Amount'].values[0]
                    }
                    category_items.append(category_row)
            category_items = pd.DataFrame(category_items)
            category_sum = pd.to_numeric(category_items['Amount'], errors='coerce').sum()
            each_category_row = {
                'YearMonth': yearmonth,
                'SubjectCode': category[0] + '00',
                'Amount': category_sum
            }
            each_category_rows.append(each_category_row)
        print(each_category_rows)
    sums_each_subject = pd.DataFrame(each_subject_rows)
    sums_each_subject = sums_each_subject.sort_values(by=['YearMonth', 'SubjectCode'])
    sums_each_category = pd.DataFrame(each_category_rows)
    sums_each_category = sums_each_category.pivot_table(index='YearMonth',columns='SubjectCode',values='Amount')
    sums_each_category.index.name = 'YearMonth'
    sums_each_category = sums_each_category.reset_index()
    #Caluculate net income and total equity
    sums_each_category['NetIncome'] = 0 - sums_each_category['400'] - sums_each_category['500']
    sums_each_category['TotalEquity'] = 0 - sums_each_category['100'] - sums_each_category['200']
    sums_each_category = sums_each_category.rename(columns={
        '100':'TotalAssets',
        '200':'TotalLiabilities',
        '400':'TotalIncome',
        '500':'TotalExpenses'
        })
    return sums_each_subject, sums_each_category

df = pd.read_csv('tests/test.csv')
df = add_yearmonth_column(df)
sums_subject,sums_category = get_subject_sum(df)
print(sums_subject)
print(sums_category)

[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}]
[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}, {'YearMonth': '2024-04', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-04', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-04', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-04', 'SubjectCode': '500', 'Amount': 15500}]
   YearMonth SubjectCode  Amount
2    2024-03         101   -7500
0    2024-03         200   -1500
5    2024-03         400   -5000
6    2024-03         490    -500
1    2024-03         500    3000
3    2024-03        

In [23]:
def get_monthly_summery(df):
    """
    Calculate the sum of each subject code for each month. YearMonth is the index, SubjectCode is the column, and Amount is the value.

    Args:
        Dataframe: Dataframe containing 'Date', 'SubjectCode', 'Amount'
            YearMonth (str): Year and month in 'YYYY-MM' format.
            SubjectCode (str): Subject code.
            Amount (int): Account amount.

    Returns:
        Dataframe: processed dataframe index is YearMonth, columns are SubjectCode, values are Amount
    """
    # データの読み込み
    columns_to_read=['YearMonth','SubjectCode','Amount']
    df = df[columns_to_read]

    # ピボットとフォーマット 月ごとの科目別合計金額を計算
    # sum_of_subjectは、月ごとの科目別合計金額を持つDataFrame 行は月、列は科目コード
    sum_of_subjects = df.pivot_table(index='YearMonth', columns='SubjectCode', values='Amount', aggfunc='sum').reset_index()
    sum_of_subjects = sum_of_subjects.fillna(0)
    sum_of_subjects = sum_of_subjects.astype({col: int for col in sum_of_subjects.columns if col != 'YearMonth'})

    yearmonths = df['YearMonth'].values
    yearmonths = np.unique(yearmonths)
    each_category_rows = []
    for yearmonth in yearmonths:
        items_each_yearmonth = df[df['YearMonth'] == yearmonth]
        items_each_yearmonth = items_each_yearmonth.astype({'Amount': int, 'SubjectCode': str})
        for category in ['1','2','4','5']:
            category_items = []
            for item in items_each_yearmonth['SubjectCode']:
                if item.startswith(category):
                    category_row = {
                        'YearMonth': yearmonth,
                        'SubjectCode': item,
                        'Amount': items_each_yearmonth[items_each_yearmonth['SubjectCode'] == item]['Amount'].values[0]
                    }
                    category_items.append(category_row)
            category_items = pd.DataFrame(category_items)
            category_sum = pd.to_numeric(category_items['Amount'], errors='coerce').sum()
            each_category_row = {
                'YearMonth': yearmonth,
                'SubjectCode': category[0] + '00',
                'Amount': category_sum
            }
            each_category_rows.append(each_category_row)
    sums_each_category = pd.DataFrame(each_category_rows)
    sums_each_category = sums_each_category.pivot_table(index='YearMonth',columns='SubjectCode',values='Amount')
    sums_each_category.index.name = 'YearMonth'
    sums_each_category = sums_each_category.reset_index()
    #Caluculate net income and total equity
    sums_each_category['NetIncome'] = 0 - sums_each_category['400'] - sums_each_category['500']
    sums_each_category['TotalEquity'] = 0 - sums_each_category['100'] - sums_each_category['200']
    sums_each_category = sums_each_category.rename(columns={
        '100':'TotalAssets',
        '200':'TotalLiabilities',
        '400':'TotalIncome',
        '500':'TotalExpenses'
        })
    return sum_of_subjects, sums_each_category

df1 = pd.read_csv('tests/test.csv')
df1 = add_yearmonth_column(df1)

sum_of_subject_df1,sum_of_category_df1 = get_monthly_summery(df1)
print(sum_of_subject_df1)
print(sum_of_category_df1)

[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}]
[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}, {'YearMonth': '2024-04', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-04', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-04', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-04', 'SubjectCode': '500', 'Amount': 15500}]
SubjectCode YearMonth   101   200   400  490   500   531    590
0             2024-03 -7500 -1500 -5000 -500  3000  1500  10000
1             2024-04 -7500 -1500 -5000 -500  3000  1500  10000
SubjectCode YearMonth  Tot

In [16]:
import datetime as dt
import calendar
from dateutil.relativedelta import relativedelta

def get_date_for_carryover(formatted_day):
        '''
        Get the date for the carryover data.
        Args:
            formatted_day (str): Date string in the format 'YYYY-MM-DD'
        Returns:
            datetime: Closing date for the carryover data
            datetime: Opening date for the carryover data
        '''

        # Parse the formatted_day string to a datetime object
        date_obj = dt.datetime.strptime(formatted_day, '%Y-%m-%d')

        # Get the last day of the month
        last_day = calendar.monthrange(date_obj.year, date_obj.month)[1]
        last_day_date = dt.datetime(date_obj.year, date_obj.month, last_day)
        last_day_date = last_day_date.strftime('%Y-%m-%d')

        # Get the first day of the next month
        next_first_day_date = date_obj + relativedelta(months=1)
        next_first_day_date = next_first_day_date.strftime('%Y-%m-%d')

        return last_day_date, next_first_day_date

In [33]:
def get_carryover_data(sum_of_subjects, sum_of_categories,yearmonth):
    sum_of_subjects = sum_of_subjects[sum_of_subjects['YearMonth'] == yearmonth]
    sum_of_categories = sum_of_categories[sum_of_categories['YearMonth'] == yearmonth]
    print(sum_of_subjects)
    closing_day,next_first_day_date = get_date_for_carryover(yearmonth+'-01')
    carryover_data = []

    for initial in [['TotalEquity','300'],['NetIncome','600']]:
        row1 = {
            'YearMonth': closing_day,
            'SubjectCode': initial[1],
            'Amount': sum_of_categories[initial[0]].values[0],
            'Remarks': 'Carryover '+ initial[1]
        }
        row2 = {
            'YearMonth': next_first_day_date,
            'SubjectCode': initial[1],
            'Amount': sum_of_categories[initial[0]].values[0],
            'Remarks': 'Carryover '+ initial[1]
        }
        carryover_data.append(row1)
        carryover_data.append(row2)
    for initial in ['1','2']:
        for item in sum_of_subjects.columns:
            if str(item).startswith(initial):
                row = {
                    'YearMonth': closing_day,
                    'SubjectCode': item,
                    'Amount': sum_of_subjects[item].values[0],
                    'Remarks': 'Carryover '+ str(item)
                }
                carryover_data.append(row)
    carryover_df = pd.DataFrame(carryover_data)
    return carryover_df

df1 = pd.read_csv('tests/test.csv')
df1 = add_yearmonth_column(df1)
sum_of_subject_df1,sum_of_category_df1 = get_monthly_summery(df1)
carryover_data = get_carryover_data(sum_of_subject_df1, sum_of_category_df1,'2024-03')
print(sum_of_subject_df1)
print(carryover_data)

[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}]
[{'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500}, {'YearMonth': '2024-04', 'SubjectCode': '100', 'Amount': -6000}, {'YearMonth': '2024-04', 'SubjectCode': '200', 'Amount': -4000}, {'YearMonth': '2024-04', 'SubjectCode': '400', 'Amount': -5500}, {'YearMonth': '2024-04', 'SubjectCode': '500', 'Amount': 15500}]
SubjectCode YearMonth   101   200   400  490   500   531    590
0             2024-03 -7500 -1500 -5000 -500  3000  1500  10000
SubjectCode YearMonth   101   200   400  490   500   531    590
0             2024-03 -750

In [18]:
sums_subject.reset_index(drop=True, inplace=True)
carryover_data = []
for yearmonth in sums_subject['YearMonth'].unique():
    last_day_date, next_first_day_date = get_date_for_carryover(yearmonth + '-01')
    for item in [['NetIncome','300'],['TotalEquity','600']]:
            row = {
                'Date': last_day_date,
                'SubjectCode': item[1],
                'Amount': sums_category[sums_category['YearMonth'] == yearmonth][item[0]].values[0],
                'Remarks': 'Carryover 99'
            }
            carryover_data.append(row)

carryover_df=pd.DataFrame(carryover_data)
print(carryover_df)

         Date SubjectCode  Amount       Remarks
0  2024-03-31         300  -10000  Carryover 99
1  2024-03-31         600   10000  Carryover 99
2  2024-04-30         300  -10000  Carryover 99
3  2024-04-30         600   10000  Carryover 99


In [2]:
import pandas as pd

# 入力データ
data = [
    {'YearMonth': '2024-03', 'SubjectCode': '100', 'Amount': -6000},
    {'YearMonth': '2024-03', 'SubjectCode': '200', 'Amount': -4000},
    {'YearMonth': '2024-03', 'SubjectCode': '400', 'Amount': -5500},
    {'YearMonth': '2024-03', 'SubjectCode': '500', 'Amount': 15500},
    {'YearMonth': '2024-04', 'SubjectCode': '100', 'Amount': -6000},
    {'YearMonth': '2024-04', 'SubjectCode': '200', 'Amount': -4000},
    {'YearMonth': '2024-04', 'SubjectCode': '400', 'Amount': -5500},
    {'YearMonth': '2024-04', 'SubjectCode': '500', 'Amount': 15500}
]

# DataFrameに変換
df = pd.DataFrame(data)

# YearMonthをインデックスに、SubjectCodeを列に変換してピボットテーブルを作成
pivot_df = df.pivot(index='YearMonth', columns='SubjectCode', values='Amount')

# 結果を表示
print(pivot_df)

SubjectCode   100   200   400    500
YearMonth                           
2024-03     -6000 -4000 -5500  15500
2024-04     -6000 -4000 -5500  15500


In [4]:
from processor.processor import CSVProcessor
import pandas as pd

input_file_path = 'tests/test.csv'
output_file_path = 'tests/test_outputs.csv'
csv_processor = CSVProcessor('tests/test.csv', None, subjectcodes_path='codes.csv',balance_sheet_path=None)

datas = pd.read_csv(csv_processor.input_file)
datas = datas.astype({
    'Date': 'str',
    'SubjectCode': 'str',
    'Amount': 'int64',
    'Remarks': 'str'
})

# 入力したデータを読み込んで処理を行う
datas = (datas.pipe(csv_processor.generate_id)
            .pipe(csv_processor.apply_subject_from_code)
            .pipe(csv_processor.sort_csv)
            .pipe(csv_processor.add_yearmonth_column)
            .pipe(csv_processor.remove_duplicates))

for yearmonth in datas['YearMonth'].unique():
    csv_processor.yearmonth = yearmonth
    sum_of_subjects,sum_of_categories = csv_processor.get_monthly_summery(datas)
    carryover_datas = csv_processor.get_carryover_data(sum_of_subjects,sum_of_categories)
    datas = pd.concat([datas,carryover_datas],ignore_index=True)
    datas = (datas.pipe(csv_processor.generate_id)
                .pipe(csv_processor.apply_subject_from_code)
                .pipe(csv_processor.sort_csv)
                .pipe(csv_processor.add_yearmonth_column)
                .pipe(csv_processor.remove_duplicates))

print(datas)
datas.to_csv(output_file_path,index=False)

Index(['Date', 'ID', 'SubjectCode', 'Amount', 'Remarks'], dtype='object')
Index(['Date', 'ID', 'SubjectCode', 'Amount', 'Remarks', 'Subject', 'Year',
       'Month', 'YearMonth'],
      dtype='object')
Index(['Date', 'ID', 'SubjectCode', 'Amount', 'Remarks', 'Subject', 'Year',
       'Month', 'YearMonth'],
      dtype='object')
         Date            ID SubjectCode  Amount               Remarks  \
0  2024-03-03  202403030010         200   -2000         Starbucks 001   
1  2024-03-03  202403030011         500    2000         Starbucks 001   
2  2024-03-03  202403030020         101   -1500           Tempura 002   
3  2024-03-03  202403030021         531    1500           Tempura 002   
4  2024-03-10  202403100010         101  -10000               UFJ 001   
5  2024-03-10  202403100011         590   10000  Special Expenses 001   
6  2024-03-15  202403150010         400   -5000            Salary 001   
7  2024-03-15  202403150011         101    5000            Salary 001   
8  2024-03-20

In [88]:
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta

input_file_path = 'datas/test_balance_sheet.csv'

df = pd.read_csv(input_file_path)
asset_columns = [col for col in df.columns if str(col).startswith('1')]
liability_columns = [col for col in df.columns if str(col).startswith('2')]

carryover_columns = asset_columns + liability_columns

months = df['YearMonth']
data = []

for month in months.values:
    item = df[df['YearMonth'] == month]
    for carryover_column in carryover_columns:
        carryover_value = item[carryover_column].values[0]
        formatted_month = month + "-01"
        date_obj = datetime.strptime(formatted_month, "%Y-%m-%d")

        # 1か月後の日付を計算
        date_obj += relativedelta(months=1)
        data.append({'Date': date_obj, 'SubjectCode': carryover_column, 'Amount': carryover_value, 'Remarks':"Carryover 99"})

new_df = pd.DataFrame(data, columns=['Date','ID', 'SubjectCode', 'Amount','Remarks'])
new_df= new_df.fillna('')

n_new_df = new_df.append(new_df)

print(n_new_df)

        Date ID SubjectCode  Amount       Remarks
0 2024-04-01            100   -6555  Carryover 99
1 2024-04-01            101  -26986  Carryover 99
2 2024-04-01            102   10000  Carryover 99
3 2024-04-01            130  -14100  Carryover 99
4 2024-04-01            200  -16856  Carryover 99
5 2024-05-01            100   -1410  Carryover 99
6 2024-05-01            101   49114  Carryover 99
7 2024-05-01            102   15000  Carryover 99
8 2024-05-01            130       0  Carryover 99
9 2024-05-01            200       0  Carryover 99
0 2024-04-01            100   -6555  Carryover 99
1 2024-04-01            101  -26986  Carryover 99
2 2024-04-01            102   10000  Carryover 99
3 2024-04-01            130  -14100  Carryover 99
4 2024-04-01            200  -16856  Carryover 99
5 2024-05-01            100   -1410  Carryover 99
6 2024-05-01            101   49114  Carryover 99
7 2024-05-01            102   15000  Carryover 99
8 2024-05-01            130       0  Carryover 99
