In [1]:
import os
import glob
import pandas as pd
from datetime import datetime
import locale
import re
import numpy as np
from pandas.tseries.offsets import DateOffset, MonthEnd
from dateutil.relativedelta import relativedelta

In [2]:
class File_manager:

    # 생성자
    def __init__(self, fund_code):
        # Ensure the fund code is a string
        self.fund_code = str(fund_code)
        self.df = None
        self.folder_path = '/Users/ihan-il/Desktop/Python Folder/FUND_CLASS/캡스톤데이터2'

    # 파일 이름 가져오기
    def scan_files_including_regex(self, file_folder, regex, option='path'):
        with os.scandir(file_folder) as files:
            lst = [file.name for file in files if re.findall(regex, file.name)]
        mapping = {
            'name': lst,
            'path': [os.path.join(file_folder, file_name) for file_name in lst]
        }
        return mapping[option]
        
    # 정규 표현식을 사용하여 날짜와 시간 부분을 추출하는 함수
    def extract_datetime(self, file_name):
        # 정규 표현식으로 날짜와 시간 추출 (시간은 옵션)
        match = re.search(r'save(\d{4})(\d{2})(\d{2})(\d{4})?', file_name)
        if match:
            # 연, 월, 일은 항상 있으며, 시간이 없는 경우 0000으로 설정
            date_part = match.group(1) + match.group(2) + match.group(3)
            time_part = match.group(4) if match.group(4) else "0000"
            return datetime.strptime(date_part + time_part, '%Y%m%d%H%M')
        else:
            return None

    # 최근의 파일 가져오기 
    def get_latest_file(self, file_code):
        file_code = str(file_code)
        file_list = self.scan_files_including_regex(self.folder_path, regex = f'menu{file_code}-code{self.fund_code}')
        sorted_file_list = sorted(file_list, key=self.extract_datetime, reverse=True)

        if sorted_file_list:
            return sorted_file_list[0]
        else:
            raise FileNotFoundError(f"No file found with file code {file_code} and fund code {self.fund_code}")

    
    def get_file_as_df_with_filecode(self, file_code, merge_header=False):
        # 파일 리스트를 수정 시간 기준으로 내림차순 정렬하여, 가장 최근에 수정된 파일이 리스트의 첫 번째에 오도록 합니다.
        latest_file = self.get_latest_file(file_code)
        
        # merge_header가 참일 경우
        if merge_header:
            # Read CSV without headers
            df = pd.read_csv(latest_file, header=None)
            # Get the first two rows to create the header
            headers = self.create_header(df.iloc[0], df.iloc[1])
            # Assign the new headers to the DataFrame and drop the first two rows
            df.columns = headers
            df = df.drop([0, 1])
        else:
            # Read CSV with default headers
            df = pd.read_csv(latest_file)
        return df
    
    # m2205를 위한 데이터 프레임 가지고 오는 법 
    def get_file_as_df_with_filepath(self, file_path, merge_header=False):
        # 파일 리스트를 수정 시간 기준으로 내림차순 정렬하여, 가장 최근에 수정된 파일이 리스트의 첫 번째에 오도록 합니다.
        
        # merge_header가 참일 경우
        if merge_header:
            # Read CSV without headers
            df = pd.read_csv(file_path, header=None)
            # Get the first two rows to create the header
            headers = self.create_header(df.iloc[0], df.iloc[1])
            # Assign the new headers to the DataFrame and drop the first two rows
            df.columns = headers
            df = df.drop([0, 1])
        else:
            # Read CSV with default headers
            df = pd.read_csv(file_path)
        return df


    # 데이터프레임 헤더 처리
    def create_header(self, row1, row2):
        # Combine row1 and row2 according to the specified rules
        header = []
        for col1, col2 in zip(row1, row2):
            # 개행 문자를 빈 문자열로 바꿈
            clean_col1 = col1.replace('\n', '') if isinstance(col1, str) else col1
            clean_col2 = col2.replace('\n', '') if isinstance(col2, str) else col2
            
            # If the second row has a value, use it, otherwise use the first row's value
            header.append(clean_col2 if pd.notnull(clean_col2) and clean_col2 != '' else clean_col1)
        return header
    
    def make_unique_column_names(self, df):
        # 컬럼 이름에 중복이 있는 경우, 고유한 이름을 생성합니다.
        cols = df.columns
        unique_cols = {}
        new_cols = []
        
        for col in cols:
            if col in unique_cols:
                unique_cols[col] += 1
                new_col = f"{col}.{unique_cols[col]}"
            else:
                unique_cols[col] = 1
                new_col = col
            new_cols.append(new_col)
        
        df.columns = new_cols
    
    def save_df(self):

        # Define the folder path for 'save_file' directory
        folder_path = 'save_file'

        # Check if folder exists, if not, create it
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        # Get the current timestamp
        save_timestamp = datetime.now().strftime('%Y%m%d_%H%M')

        # Define the file name with current timestamp
        file_name = f'menu_2305_save{save_timestamp}.csv'

        # Complete file path
        file_path = os.path.join(folder_path, file_name)

        # Save the dataframe to CSV
        self.df.to_csv(file_path, index=False)

        print(f"File saved to {file_path}")

# 그 외 공통 메서드들
class Etc_df_Processor:
    def __init__(self, df):
        self.df = df
        
    def convert_number_format(self, column_name):
        self.df[column_name] = self.df[column_name].apply(self._convert_number_format_helper).astype(float)

    def _convert_number_format_helper(self, s):
        return float(s.replace(',', '')) if isinstance(s, str) else s

    def replace_values_less_than_or_equal_to_zero(self, *column_names):
        for column_name in column_names:
            self.df.loc[self.df[column_name] <= 0, column_name] = None
            self.df[column_name].ffill(inplace=True)

In [3]:
class Performance_Indicators:
    def __init__(self, fund_code, start_date=None, end_date=None):

        # fund_code        
        self.fund_code = fund_code
        self.file_manager = File_manager(fund_code)
        
        self.menu_num = '8186'
        self.menu_num_2 = '2820'
        self.start_date = start_date
        self.end_date = end_date

        self.df = None

        self.date_column = '일자'
        
    def open_df8186_raw(self) :
        # 원시 데이터 열기 및 선택된 컬럼 추출
        df_raw = self.file_manager.get_file_as_df_with_filecode(self.menu_num)
        self.df = df_raw                              
        return self.df    
    
    def filter_by_date_range(self):

        # '일자' 컬럼을 datetime 타입으로 변환
        self.df[self.date_column] = pd.to_datetime(self.df[self.date_column])

        # start_date와 end_date에 따라 조건을 설정합니다.
        if self.start_date is None and self.end_date is not None:
            self.df = self.df[self.df[self.date_column] <= self.end_date]
        elif self.start_date is not None and self.end_date is None:
            self.df = self.df[self.df[self.date_column] >= self.start_date]
        elif self.start_date is not None and self.end_date is not None:
            self.df = self.df[(self.df[self.date_column] >= self.start_date) & 
                                  (self.df[self.date_column] <= self.end_date)]

        return self.df
    
    def select_columns(self):
        self.df = self.df[['일자', '수정기준가', 'KOSPI지수']]
        self.df.reset_index(drop=True, inplace=True)
        
    def change_format_and_replace(self):
        etc_processor = Etc_df_Processor(self.df)
        etc_processor.convert_number_format('수정기준가')
        etc_processor.convert_number_format('KOSPI지수')
        
        etc_processor.replace_values_less_than_or_equal_to_zero('KOSPI지수')
        
        self.df = etc_processor.df
        
    def adjust_percent(self, *columns):
        for column_name in columns:
            initial_value = self.df[column_name].iloc[0]
            self.df[column_name + ' 누적수익률'] = ((self.df[column_name] - initial_value) / initial_value) * 100
            self.df.loc[0, column_name + ' 누적수익률'] = 0
            
    def calculate_annualized_returns(self, *cumulative_return_columns):
        # 날짜 컬럼의 첫 번째와 마지막 날짜 차이를 일수로 계산
        total_days = (self.df['일자'].iloc[-1] - self.df['일자'].iloc[0]).days

        # 주어진 누적수익률 컬럼들에 대해 연환산 수익률 계산
        for column in cumulative_return_columns:
            annualized_return_column = column + ' 연환산 수익률'
            self.df[annualized_return_column] = self.df[column] * 365 / total_days

        return self.df
    

    def calculate_volatility(self, *return_columns):
        for column in return_columns:
            # 일일 수익률 계산 
            daily_returns = self.df[column].pct_change()

            # 일일 수익률의 표준편차 계산
            volatility = daily_returns.std()

            # 연율화 변동성 컬럼명
            annualized_volatility_column = column + ' 연율화 변동성'

            # 연율화 변동성 계산 및 데이터프레임에 추가
            self.df[annualized_volatility_column] = volatility * (365 ** 0.5)

        return self.df

    def calculate_sharpe_ratio(self, return_column):
        # 연율화 변동성 컬럼명
        volatility_column = return_column.replace(' 누적수익률 연환산 수익률', '') + ' 연율화 변동성'
        risk_free_rate=0
        # 샤프 비율 계산 및 데이터프레임에 추가
        sharpe_ratio = (self.df[return_column].iloc[-1] - risk_free_rate) / self.df[volatility_column].iloc[-1]
        column_name = return_column.replace(' 누적수익률 연환산 수익률', '')
        self.df[column_name + ' 샤프 비율'] = sharpe_ratio

        return self.df
    
    def open_df2820_raw(self) :
        # 원시 데이터 열기 및 선택된 컬럼 추출
        data_raw = self.file_manager.get_file_as_df_with_filecode(self.menu_num_2)
        self.data = data_raw                              
        return self.data
    
    
    def calculate_winning_ratio(self):
        # Filtering for sell transactions
        sell_transactions = self.data[self.data['매매구분'].isin(['주식장내매도', 'ETF장내매도'])]
        # Filtering for transactions with positive realized profit rate
        winning_transactions = sell_transactions[sell_transactions['실현수익률'] > 0]

        # Calculate winning ratio
        winning_ratio = len(winning_transactions) / len(sell_transactions) if len(sell_transactions) > 0 else 0

        return winning_ratio
    
    def calculate_max_drawdown(self, column):
        mdd_values = []
        for index, max_value in self.df[column].iteritems():
            min_value_after = self.df.loc[index:, column].min()
            current_mdd = (max_value - min_value_after) / max_value if max_value > 0 else 0
            mdd_values.append(current_mdd)

        # 최대 MDD 값 계산
        max_mdd = max(mdd_values)

        # 최대 MDD를 데이터프레임에 추가
        self.df[column + ' 최대 MDD'] = max_mdd

        return self.df
    
    def get_df_performace(self):
        self.open_df8186_raw()
        self.change_format_and_replace()
        self.filter_by_date_range()
        self.select_columns()

        self.adjust_percent('수정기준가','KOSPI지수')
        self.calculate_annualized_returns('수정기준가 누적수익률', 'KOSPI지수 누적수익률')
        self.calculate_volatility('수정기준가', 'KOSPI지수')
        self.calculate_sharpe_ratio('수정기준가 누적수익률 연환산 수익률')
        self.calculate_sharpe_ratio('KOSPI지수 누적수익률 연환산 수익률')
        self.calculate_max_drawdown('수정기준가')
        self.calculate_max_drawdown('KOSPI지수')
        
        return self.df
    
    def get_winning_ratio(self):
        self.open_df2820_raw()
        winning_ratio = self.calculate_winning_ratio()
        
        return winning_ratio
        

In [4]:
performance_indicators = Performance_Indicators(fund_code='A00001')

# 원시 데이터 열기
performance_indicators.get_winning_ratio()

0.5470249520153551

In [6]:
m = Performance_Indicators(fund_code='A00001')
df = m.get_df_performace()
df

  for index, max_value in self.df[column].iteritems():
  for index, max_value in self.df[column].iteritems():


Unnamed: 0,일자,수정기준가,KOSPI지수,수정기준가 누적수익률,KOSPI지수 누적수익률,수정기준가 누적수익률 연환산 수익률,KOSPI지수 누적수익률 연환산 수익률,수정기준가 연율화 변동성,KOSPI지수 연율화 변동성,수정기준가 샤프 비율,KOSPI지수 샤프 비율,수정기준가 최대 MDD,KOSPI지수 최대 MDD
0,2023-05-11,999.71,2491.00,0.000000,0.000000,0.000000,0.000000,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
1,2023-05-12,998.68,2475.42,-0.103030,-0.625452,-0.213670,-1.297101,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
2,2023-05-13,998.78,2475.42,-0.093027,-0.625452,-0.192925,-1.297101,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
3,2023-05-14,998.83,2475.42,-0.088026,-0.625452,-0.182553,-1.297101,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
4,2023-05-15,1000.29,2479.35,0.058017,-0.467684,0.120319,-0.969912,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,2023-10-30,1035.74,2310.55,3.604045,-7.244079,7.474298,-15.023231,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
173,2023-10-31,1034.18,2277.99,3.448000,-8.551184,7.150682,-17.733990,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
174,2023-11-01,1037.94,2301.56,3.824109,-7.604978,7.930681,-15.771687,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
175,2023-11-02,1035.02,2343.12,3.532024,-5.936572,7.324937,-12.311640,0.104745,0.139876,72.585299,-73.007532,0.055716,0.145883
