In [224]:
# 필요 라이브러리

import os
import glob
import pandas as pd
from datetime import datetime
import locale
import re
import numpy as np
from decimal import Decimal, ROUND_HALF_UP
import plotly.graph_objects as go

from pandas.tseries.offsets import DateOffset, MonthEnd
from dateutil.relativedelta import relativedelta

from ShiningPebbles import * 

pd.options.mode.chained_assignment = None



In [225]:
## 클래스 밖에서

# 파일명에서 저장 시간을 추출
def extract_savetime(file_name):
    # 정규 표현식 : save
    match = re.search(r'save(\d{4})(\d{2})(\d{2})(\d{4})?', file_name)
    if match:
        # 연, 월, 일은 항상 있으며, 시간이 없는 경우 0000으로 설정
        date_part = match.group(1) + match.group(2) + match.group(3)
        time_part = match.group(4) if match.group(4) else "0000"
        return datetime.strptime(date_part + time_part, '%Y%m%d%H%M')
    else:
        return None

# 파일명에서 참고날짜를 추출
def extract_datetime(file_name):
    # 정규 표현식 : date
    match = re.search(r'date(\d{4})(\d{2})(\d{2})', file_name)
    if match:
        return datetime(int(match.group(1)), int(match.group(2)), int(match.group(3)))
    return None

# 참고날짜를 추출하여 제일 최근의 파일 가져오기
def get_latest_file_use_datetime(folder_path, fund_code, menu_num): 
    file_list=scan_files_including_regex(folder_path, regex=f'menu{menu_num}-code{fund_code}')
    sorted_file_list=sorted(file_list, key=extract_datetime, reverse=True)   

    if sorted_file_list:
        return sorted_file_list[0]
    else:
        raise FileNotFoundError(f"No file found with menu {menu_num} and fund code {fund_code}")

# 2205, 2820, 2305 파일 가져오기
def get_file_start2_as_df(folder_path, input_date, fund_code, menu_num, merge_header=False) :
    # 파일 목록을 가져오기
    file_list = scan_files_including_regex(file_folder=folder_path, regex=f'menu{menu_num}-code{fund_code}')
    # input_date를 datetime 객체로 변환
    input_date = pd.to_datetime(input_date, format='%Y-%m-%d')
    # input_date 이하인 파일만 남김
    filtered_file = [file for file in file_list if extract_datetime(file) and extract_datetime(file) <= input_date]
    # 정렬
    sorted_file_list = sorted(filtered_file, key=lambda file: (extract_datetime(file), extract_savetime(file)), reverse=True)


    # 파일 열기
    file_name = sorted_file_list[0]
    used_file_path = os.path.join(folder_path, file_name)
    print(used_file_path)

    # merge_header=True
    if merge_header:
        # 헤더 없이 읽기
        df=pd.read_csv(used_file_path, header=None)
        # 헤더 합쳐서 생성
        headers=create_header(df.iloc[0], df.iloc[1])
        # 헤더 만들고 원래 컬럼들 drop
        df.columns=headers
        df=df.drop([0, 1])
    else:
        df=pd.read_csv(used_file_path)
    
    df=df.dropna(how='all')
    return df, file_name

# 8186 파일을 가져오기
def get_file_8186_as_df(folder_path, fund_code):
    # 파일 경로 가져오기
    file_list = scan_files_including_regex(file_folder=folder_path, regex=f'menu8186-code{fund_code}')
    sorted_file_list = sorted(file_list, key=extract_savetime, reverse=True)
    print(file_list, sorted_file_list)
    # 파일 열기
    file_name = sorted_file_list[0]
    used_file_path = os.path.join(folder_path, file_name)
    print(used_file_path)

    df=pd.read_csv(used_file_path)
    df=df.dropna(how='all')

    return df, file_name

# 데이터프레임 헤더 처리
def create_header(row1, row2):

    # Combine row1 and row2 according to the specified rules
    header = []
    for col1, col2 in zip(row1, row2):
        # 개행 문자를 빈 문자열로 바꿈
        clean_col1 = col1.replace('\n', '') if isinstance(col1, str) else col1
        clean_col2 = col2.replace('\n', '') if isinstance(col2, str) else col2
        
        # If the second row has a value, use it, otherwise use the first row's value
        header.append(clean_col2 if pd.notnull(clean_col2) and clean_col2 != '' else clean_col1)
    return header

# 컬럼 이름 중복 처리
def make_unique_column_names(df):

    cols = df.columns
    unique_cols = {}
    new_cols = []
    
    for col in cols:
        if col in unique_cols:
            unique_cols[col] += 1
            new_col = f"{col}.{unique_cols[col]}"
        else:
            unique_cols[col] = 1
            new_col = col
        new_cols.append(new_col)
    
    df.columns = new_cols
    
# 파일 저장
def save_df_to_csv(folder_path, fund_code, df, df_name, menu_num, used_file_date=None, save_option='name'):
    # Define the folder path for 'save_file' directory inside 'folder_path'
    save_path = os.path.join(folder_path, 'save_file')

    # Check if 'save_file' folder exists within 'folder_path', if not, create it
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Get the current timestamp
    save_timestamp = datetime.now().strftime('%Y%m%d%H%M')

    # Define a mapping for save options
    save_option_mappings = {
        'name': lambda: os.path.join(save_path, f'dataset-{df_name}-code{fund_code}-menu{menu_num}-{"" if used_file_date is None else used_file_date + "-"}save{save_timestamp}.csv'),
        'file': lambda: os.path.join(save_path, f'fund{fund_code}-save{save_timestamp}', f'dataset-{df_name}-code{fund_code}-menu{menu_num}{"" if used_file_date is None else "-" + used_file_date}.csv')
    }

    # Check if save_option is valid and get the file path
    if save_option in save_option_mappings:
        save_file_path = save_option_mappings[save_option]()
        # Create directory if necessary (for 'file' option)
        if save_option == 'file':
            os.makedirs(os.path.dirname(save_file_path), exist_ok=True)
    else:
        raise ValueError("Invalid save_option. Use 'name' or 'file'.")

    # Save the dataframe to CSV
    df.to_csv(save_file_path, index=False, encoding='utf-8-sig')

    print(f"File saved to {save_file_path}")


# 숫자값, 문자값 처리

# 컬럼 값에 대해 문자열 -> 실수형
def convert_number_format(df, column_name):
    df.loc[:, column_name] = df[column_name].apply(lambda x: float(x.replace(',', '')) if isinstance(x, str) else x)

# 0보다 작은 값(사실상 nan) -> 이전 행의 값으로 대체
def replace_values_less_than_or_equal_to_zero(df, *column_names):
    for column_name in column_names:
        df.loc[df[column_name] <= 0, column_name] = None
        df.loc[:, column_name].ffill(inplace=True)


# 셋째자리에서 반올림
def round_values(df, column_name):
    df[column_name] = df[column_name].apply(lambda x: round(float(x), 2) if isinstance(x, (float, str)) else x)
    return df

# '원'
def append_won(df, column_name):
    df[column_name] = df[column_name].astype(str) + '원'
    return df

# '%'
def append_percent(df, column_name):
    df[column_name] = df[column_name].astype(str) + '%'
    return df

# 천단위마다 쉼표
def format_with_comma(df, column_name):
    locale.setlocale(locale.LC_ALL, 'ko_KR.UTF-8')
    df[column_name] = df[column_name].apply(lambda x: locale.format_string("%d", x, grouping=True))
    return df

# 양수면 + 음수면 -
def prepend_sign(df, column_name):
    df[column_name] = df[column_name].apply(lambda x: '+' + str(x) if x >= 0 else str(x))
    return df


In [226]:

class UpDownCapture:

    def __init__(self, folder_path, fund_code, input_date, date_column) :

        # 기본 변수
        self.folder_path=folder_path
        self.fund_code = fund_code
        self.input_date=input_date

        # 기간 필터링 위한 변수
        self.date_column=date_column

        self.colnames_raw = ['수정기준가', 'KOSPI지수']
        self.df_reference = self.get_df_reference()
        self.df_upDownCapture = self.get_upDown_df()

    def open_df8186_raw(self) :
        df_raw, _ = get_file_8186_as_df(folder_path=self.folder_path, fund_code=self.fund_code)

        return df_raw
    
    def select_columns(self, df) :
        df = df[[self.date_column] + self.colnames_raw]
        df.reset_index(drop=True, inplace=True)

        return df
    
    def get_df_calculate_upDown_capture(self, df) :

        df['fund_dailyReturn'] = df['수정기준가'].pct_change()
        df['KOSPI_dailyReturn'] = df['KOSPI지수'].pct_change()

        # 일자별 수익률이 0보다 큰 날의 수익률만 추출
        df['fund_up'] = np.where(df['KOSPI_dailyReturn'] > 0, df['fund_dailyReturn'], 0)
        df['KOSPI_up'] = np.where(df['KOSPI_dailyReturn'] > 0, df['KOSPI_dailyReturn'], 0)
        
        # 일자별 수익률이 0보다 작은 날의 수익률만 추출
        df['fund_down'] = np.where(df['KOSPI_dailyReturn'] < 0, df['fund_dailyReturn'], 0)
        df['KOSPI_down'] = np.where(df['KOSPI_dailyReturn'] < 0, df['KOSPI_dailyReturn'], 0)

        return df

    def calculate_upDown_capture(self, df):

        df = self.get_df_calculate_upDown_capture(df)

        #up capture ratio 계산
        up_capture = (df['fund_up'].sum() / df['KOSPI_up'].sum())
        #down capture ratio 계산
        down_capture = (df['fund_down'].sum() / df['KOSPI_down'].sum())
        #capture ratio 계산
        capture_ratio = round((up_capture / down_capture), 2)
        up_capture = round(up_capture, 2)
        down_capture = round(down_capture, 2)

        return up_capture, down_capture, capture_ratio

    def change_format_and_replace(self, df):

        for col in self.colnames_raw:
            convert_number_format(df, col)
            replace_values_less_than_or_equal_to_zero(df, col)

        return df

    def get_df_reference(self) :
        df = self.open_df8186_raw()
        df = self.select_columns(df)
        df = self.change_format_and_replace(df)
        df = filter_by_date_range(df, date_column=self.date_column, end_date=self.input_date)

        return df


    def get_upDown_capture(self):
        df = self.df_reference

        # 설정일 이후 지금까지지
        up_capture_onDate, down_capture_onDate, capture_ratio_onDate = self.calculate_upDown_capture(df)

        # 1년동안
        one_year_ago = pd.to_datetime(self.input_date) - pd.DateOffset(years=1)

        if one_year_ago <  pd.to_datetime(df[self.date_column].iloc[0]):
            # one_year_ago가 데이터의 첫 행 일자보다 작은 경우
            up_capture_oneYear = '-'
            down_capture_oneYear = '-'
            capture_ratio_oneYear = '-'
        else:
            # one_year_ago가 데이터의 첫 행 일자보다 크거나 같은 경우
            df_oneYear = df[df['일자'] >= one_year_ago]
            up_capture_oneYear, down_capture_oneYear, capture_ratio_oneYear = self.calculate_upDown_capture(df_oneYear)

        return up_capture_onDate, down_capture_onDate, capture_ratio_onDate, up_capture_oneYear, down_capture_oneYear, capture_ratio_oneYear

    def get_upDown_df(self):
        # Get up/down capture for the last year
        up_capture_onDate, down_capture_onDate, capture_ratio_onDate, up_capture_oneYear, down_capture_oneYear, capture_ratio_oneYear = self.get_upDown_capture()

        # Create a DataFrame with the results
        summary_data = {
            'Capture 분류' : ['Up capture', 'Down capture', 'Capture ratio'],
            '직전 1년': [up_capture_oneYear, down_capture_oneYear, capture_ratio_oneYear],
            '설정일 이후': [up_capture_onDate, down_capture_onDate, capture_ratio_onDate]
        }

        df = pd.DataFrame(summary_data)

        return df


In [227]:
class InvestmentPerformance: 

    def __init__(self, folder_path, fund_code, input_date, date_column) :

        # 기본 변수
        self.folder_path=folder_path
        self.fund_code = fund_code
        self.input_date=input_date

        # 기간 필터링 위한 변수
        self.date_column=date_column

        self.colnames_raw = ['수정기준가', 'KOSPI지수', 'KOSPI200지수', 'KOSDAQ지수']
        self.df_reference = self.get_df_reference()
        self.df_investment_performance = self.get_investment_performance_df()

    def open_df8186_raw(self) :
        df_raw, _ = get_file_8186_as_df(folder_path=self.folder_path, fund_code=self.fund_code)

        return df_raw
    
    def select_columns(self, df) :
        df = df[[self.date_column] + self.colnames_raw]
        df.reset_index(drop=True, inplace=True)

        return df
    
    def change_format_and_replace(self, df):

        for col in self.colnames_raw:
            convert_number_format(df, col)
            replace_values_less_than_or_equal_to_zero(df, col)

        return df

    def get_df_reference(self) :
        df = self.open_df8186_raw()
        df = self.select_columns(df)
        df = self.change_format_and_replace(df)
        df = filter_by_date_range(df, date_column=self.date_column, end_date=self.input_date)

        return df

    def get_volatility(self):
        df = self.df_reference
        volatility = {}
        #일일 수익률 계산
        daily_return = df[self.colnames_raw].pct_change()
        daily_return.iloc[0] = 0
        daily_return_std = daily_return.std()

        #각 칼럼에 대한 변동성 계산
        for col in self.colnames_raw:
            volatility[col] = round(daily_return_std[col] * np.sqrt(252) *100 , 2)

        return volatility

    def get_sharpe_ratio(self): # 무위험 수익률은 0으로 가정
        df = self.df_reference
        sharpe_ratio = {}
        #일일 수익률 계산
        daily_return = df[self.colnames_raw].pct_change()
        daily_return.iloc[0] = 0

        daily_return_mean = daily_return.mean()
        daily_return_std = daily_return.std()

        #각 칼럼에 대한 변동성 계산
        for col in self.colnames_raw:
            sharpe_ratio[col] = round(daily_return_mean[col] / daily_return_std[col] * np.sqrt(252) , 2)
            # 표준편차가 0인 경우 None으로 처리
            if daily_return_std[col] == 0:
                sharpe_ratio[col] = None
        return sharpe_ratio
    
    def get_mdd(self):
        df = self.df_reference
        mdd = {}
        
        for col in self.colnames_raw:
            #누적 최대값 계산
            df[col+'_cummax'] = df[col].cummax()
            #누적 최대값 대비 현재 값의 낙폭 계산
            df[col+'_dd'] = df[col] / df[col+'_cummax'] - 1
            #최대 낙폭 계산 (값이 음수이므로 가장 작은 값이 가장 큰 낙폭을 의미)
            mdd[col] = round(df[col+'_dd'].min() * 100, 2)

        return mdd

    def get_investment_performance_df(self):

        volatility = self.get_volatility()
        sharpe_ratio = self.get_sharpe_ratio()
        mdd = self.get_mdd()

        columns = ['Indicators'] + ['Fund', 'KOSPI', 'KOSPI200', 'KOSDAQ']
        rows = [
            ['연율화 변동성'] + list(volatility.values()),
            ['Sharpe ratio'] + list(sharpe_ratio.values()),
            ['MDD'] + list(mdd.values())
        ]

        # 리스트를 사용하여 데이터 프레임 생성
        df = pd.DataFrame(rows, columns=columns)

        return df

In [228]:

# 메인 클래스
class Report:   

    # 생성자
    def __init__(self, fund_code, input_date):
        
        # 인자
        self.fund_code=str(fund_code)
        self.input_date=input_date
        # 파일 경로
        self.folder_path='./캡스톤데이터2'

        # 날짜 변수
        # start_date는 펀드 설정일, end_date는 input_date(참고일)이 됨. 
        self.inception_date=None
        self.fund_name=None
        self.start_date=self.inception_date
        self.end_date=self.input_date

        # 이외 필요 변수
        self.date_column='일자'
        self.menu_num=None
        self.used_file_path=None
        self.df=None

        # 기능
        # 1. returns에서 2년 이상 키고 끄는 옵션
        self.returns_over2_years=False
        # 2. 저장 기능
        self.save_option='file'

        # 섹션

        u = UpDownCapture(folder_path=self.folder_path, fund_code=self.fund_code, input_date=self.input_date, date_column=self.date_column)
        i = InvestmentPerformance(folder_path=self.folder_path, fund_code=self.fund_code, input_date=self.input_date, date_column=self.date_column)
        # 데이터프레임 출력

        self.updownCapture=u.df_upDownCapture
        self.investmentPerformance=i.df_investment_performance

        # 데이터셋 저장
        '''f.save_df(save_option=self.save_option)
        g.save_df(save_option=self.save_option)
        p.save_df(save_option=self.save_option)
        a.save_df(save_option=self.save_option)
        h.save_df(save_option=self.save_option)
        s.save_df(save_option=self.save_option)
        r.save_df(save_option=self.save_option)
        l.save_df(save_option=self.save_option)'''

In [229]:
r= Report(fund_code='100004', input_date='2023-09-30')
r.investmentPerformance

['menu8186-code100004-save202311021348.csv', 'menu8186-code100004-save202312011550.csv'] ['menu8186-code100004-save202312011550.csv', 'menu8186-code100004-save202311021348.csv']
./캡스톤데이터2\menu8186-code100004-save202312011550.csv
['menu8186-code100004-save202311021348.csv', 'menu8186-code100004-save202312011550.csv'] ['menu8186-code100004-save202312011550.csv', 'menu8186-code100004-save202311021348.csv']
./캡스톤데이터2\menu8186-code100004-save202312011550.csv


Unnamed: 0,Indicators,Fund,KOSPI,KOSPI200,KOSDAQ
0,연율화 변동성,16.01,13.32,13.99,19.4
1,Sharpe ratio,0.68,-0.59,-0.55,-0.26
2,MDD,-18.21,-34.29,-35.48,-38.53


In [230]:
r.updownCapture

Unnamed: 0,Capture,직전 1년,설정일 이후
0,Up capture,0.88,0.83
1,Down capture,0.53,0.59
2,Capture ratio,1.67,1.41
