In [None]:
import pandas as pd
import numpy as np
import os 
from datetime import datetime, timedelta
import re
from pandas.tseries.offsets import MonthEnd


기본 메서드

In [None]:
def get_today(form='%Y-%m-%d'):
    mapping = {
        '%Y%m%d': datetime.now().strftime("%Y%m%d"),
        'yyyymmdd': datetime.now().strftime("%Y%m%d"),
        '%Y-%m-%d': datetime.now().strftime("%Y-%m-%d"),
        'yyyy-mm-dd': datetime.now().strftime("%Y-%m-%d"),
        'datetime': datetime.now(),
        '%Y%m%d%H': datetime.now().strftime("%Y%m%d%H"),
    }
    today = mapping[form]
    return today 

def scan_files_including_regex(file_folder, regex, option='name'):
    with os.scandir(file_folder) as files:
        lst = [file.name for file in files if re.findall(regex, file.name)]
    
    mapping = {
        'name': lst,
        'path': [os.path.join(file_folder, file_name) for file_name in lst]
    }
    return mapping[option]

def format_date(date):
    date = date.replace('-', '')
    date = datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')
    return date

def save_df_to_file(df, file_folder, file_memo, file_extension='.csv', archive=False, archive_folder='./archive'):
    def get_today(form='%Y%m%d'):
        return datetime.now().strftime(form)
    try:
        save_time = get_today()
        file_name = f'dataset-{file_memo}-save{save_time}{file_extension}'
        file_path = os.path.join(file_folder, file_name)
        if os.path.exists(file_path) and archive:
            df_archive = pd.read_csv(file_path)
            os.makedirs(archive_folder, exist_ok=True)
            archive_file_name = 'archive-' + file_name
            archive_file_path = os.path.join(archive_folder, archive_file_name)
            df_archive.to_csv(archive_file_path, index=False)
            print(f'Archived: {archive_file_path}')
        df.to_csv(file_path, index=False)
        print(f'Saved: {file_path}')
    except Exception as e:
        print(f"Error: {e}")

In [None]:
class M8186:
    def __init__(self, fund_code, start_date, end_date, menu_code = '8186'):
        self.fund_code = fund_code
        self.menu_code = menu_code
        self.start_date = start_date
        self.end_date = end_date
        self.df = None  # 데이터프레임을 위한 초기화

    def open_df_raw(self):
        lst = scan_files_including_regex('./캡스톤데이터2', f'menu{self.menu_code}-code{self.fund_code}')
        lst = sorted(lst, reverse = True)
        file_path = lst[0]
        df = pd.read_csv(file_path)
        return df
    
    def get_df_ref(self):
        self.df = self.open_df_raw()
        self.df = self.df[['일자','KOSPI지수', 'KOSDAQ지수', 'KOSPI200지수', '수정기준가']]
        return self.df

    def fill_zero_with_previous(self, columns):
        for column in columns:
            self.df[column] = self.df[column].replace(0, None)
            self.df[column].fillna(method='ffill', inplace=True)

    def convert_to_float(self, columns):
        columns = ['수정기준가', 'KOSPI지수', 'KOSPI200지수', 'KOSDAQ지수']
        for column in columns:
            self.df[column] = self.df[column].apply(lambda x: float(x.replace(',', '' )) if isinstance (x,str) else x)
        return self.df 
            

    def filter_for_period(self, months):
            df_date = self.df['일자']
            
            df_end_date = df_date.max()
            df_start_date = df_date.min()

            # 특정 기간에 대한 데이터 필터링
            # 특정 개월수를 지정하면 오늘 날짜에서 그 개월 수만큼 거슬러 올라가 해당 기간의 시작 날짜를 계산합니다.
            if months is not None:

                if df_end_date == (df_end_date + MonthEnd(0)):
                    period_start_date = df_end_date - MonthEnd(months)
                    if period_start_date < df_start_date:
                        return self.df[0:0]  # 빈 데이터프레임 반환
                    return self.df[(df_date >= period_start_date)]
                
                else:
                    period_start_date = df_end_date - pd.DateOffset(months=months)
                    if period_start_date < df_start_date:
                        return self.df[0:0]  # 빈 데이터프레임 반환
                    return self.df[(df_date >= period_start_date)]

            else:
                return self.df



    def calculate_cumulative_return(self, columns):
        for column_name in columns:
            initial_value = self.df[column_name].iloc[0]
            self.df.loc[:, column_name + ' (%)'] = ((self.df[column_name]-initial_value) / initial_value) * 100
            self.df.loc[0, column_name + ' (%)'] = 0
        return self.df
    


    
