## Split Normalization

1. yfinance의 dividend 데이터가 액면분할 된 것인지 확인
2. 모든 티커가 포함된 배당금 정렬 csv 도출
3. 기간은 연-월 포맷으로
4. 도출 후 감소하는 배당금 감소하는 구간 있는지 확인(액면분할 잘못 반영된 부분이 있을지도 모르니까)

In [7]:
import os
import pandas as pd
from datetime import datetime

def load_and_process_dividend_data_pivoted(folder_path):
    # 관심 있는 날짜 범위 정의
    start_date = pd.to_datetime('2014-02-01')
    end_date = pd.to_datetime('2024-02-28')
    
    # 지정된 폴더에서 모든 배당금 파일 목록을 찾음
    files = [f for f in os.listdir(folder_path) if f.endswith('_d.csv')]
    
    # 합친 데이터를 저장할 빈 DataFrame 준비
    combined_data = pd.DataFrame()
    
    # 각 파일 처리
    for file in files:
        ticker = file.split('_')[0]  # 파일명에서 티커명 추출
        file_path = os.path.join(folder_path, file)
        data = pd.read_csv(file_path)
        
        # 'Date' 열을 datetime으로 변환하고 '연-월'만 추출
        data['Date'] = pd.to_datetime(data['Date']).dt.to_period('M').dt.start_time
        
        # 지정된 날짜 범위에 따라 데이터 필터링
        data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)]
        
        # 나중에 처리를 용이하게 하기 위해 기간을 문자열로 다시 변환
        data['Date'] = data['Date'].dt.to_period('M').astype(str)
        
        # 티커 기호 열 추가
        data['Ticker'] = ticker
        
        # 조합된 DataFrame에 추가
        if combined_data.empty:
            combined_data = data
        else:
            combined_data = pd.concat([combined_data, data], ignore_index=True)
    
    # 데이터를 피벗하여 티커를 열로, 날짜를 행으로 설정
    pivoted_data = combined_data.pivot_table(index='Date', columns='Ticker', values='Dividends', aggfunc='sum')
    pivoted_data.fillna(0, inplace=True)

    return pivoted_data




In [8]:
pivoted_dividend_data = load_and_process_dividend_data_pivoted('Data_Dividend')
pivoted_dividend_data

Ticker,ABBV,ABT,ADM,ADP,AFL,ALB,AMCR,AOS,APD,ATO,...,SHW,SJM,SPGI,SWK,SYY,TGT,TROW,WMT,WST,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-02,0.00,0.00,0.24,0.000000,0.185,0.000,0.176,0.075,0.000000,0.370,...,0.183333,0.58,0.30,0.00,0.00,0.43,0.00,0.00,0.0,0.63
2014-03,0.00,0.00,0.00,0.421422,0.000,0.275,0.000,0.000,0.712303,0.000,...,0.000000,0.00,0.00,0.50,0.00,0.00,0.44,0.16,0.0,0.00
2014-04,0.42,0.22,0.00,0.000000,0.000,0.000,0.000,0.075,0.000000,0.000,...,0.000000,0.00,0.00,0.00,0.29,0.00,0.00,0.00,0.1,0.00
2014-05,0.00,0.00,0.24,0.000000,0.185,0.000,0.000,0.000,0.000000,0.370,...,0.183333,0.58,0.30,0.00,0.00,0.43,0.00,0.16,0.0,0.69
2014-06,0.00,0.00,0.00,0.421422,0.000,0.275,0.000,0.000,0.712303,0.000,...,0.000000,0.00,0.00,0.50,0.00,0.00,0.44,0.00,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10,1.48,0.51,0.00,0.000000,0.000,0.000,0.000,0.320,0.000000,0.000,...,0.000000,0.00,0.00,0.00,0.50,0.00,0.00,0.00,0.0,0.00
2023-11,0.00,0.00,0.45,0.000000,0.420,0.000,0.125,0.000,0.000000,0.805,...,0.605000,1.06,0.90,0.81,0.00,1.10,0.00,0.00,0.2,0.95
2023-12,0.00,0.00,0.00,1.400000,0.000,0.400,0.000,0.000,1.750000,0.000,...,0.000000,0.00,0.00,0.00,0.00,0.00,1.22,0.19,0.0,0.00
2024-01,1.55,0.55,0.00,0.000000,0.000,0.000,0.000,0.320,0.000000,0.000,...,0.000000,0.00,0.00,0.00,0.50,0.00,0.00,0.00,0.2,0.00


In [9]:
# 데이터를 CSV 파일로 저장
output_path = '박상현이 말아주는 액면분할 정상화.csv'  # 저장할 파일의 이름과 경로 설정
pivoted_dividend_data.to_csv(output_path, index=True)  # 인덱스를 포함하여 저장

In [10]:
def check_declining_trends(df):
    # 0을 제외한 값들만 고려
    filtered_df = df[df > 0]

    # 각 열에 대해 모든 값이 이전 값보다 작은지 확인
    is_declining = filtered_df.apply(lambda column: (column.dropna().diff() < 0).all())

    return is_declining

# pivoted_dividend_data DataFrame에서 하락 추세인 티커 찾기
declining_trends = check_declining_trends(pivoted_dividend_data)

# 하락 추세인 티커 출력
declining_trends[declining_trends == True]


Series([], dtype: bool)