# 0: 下準備

## 0.1: ライブラリのインポート

In [1]:
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import matplotlib.pyplot as plt
import datetime
import yfinance as yf
import statsmodels.stats.api as sms
from statsmodels.compat import lzip
from scipy.optimize import minimize
import cvxpy as cp
import pytz
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
import statsmodels.tsa.api as smt
from copy import deepcopy
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import arma_order_select_ic
import warnings
warnings.filterwarnings("ignore")
import statsmodels.tools.eval_measures
import scipy 
from scipy import stats
import time
from attrdict import AttrDict

## 0.2: データのインポート/作成

In [1]:
def convert_to_tickers_format(raw_tickers):
    ''' Raw tickersを対象フォーマットに変換 '''
    return [ticker + '.T' for ticker in raw_tickers]


def get_ticker_to_name_mapping(tickers):
    ''' Tickerの企業名の辞書を作成 '''
    ticker_to_name = {}
    for ticker in tickers:
        try:
            company_info = yf.Ticker(ticker)
            company_name = company_info.info["shortName"]
            ticker_to_name[ticker] = company_name
        except:
            ticker_to_name[ticker] = None
    return ticker_to_name


all_raw_tickers = [
    "1925", "1928", "2413", "2502", "2503", "2802", "2914", "3382", "3402", "3407", "4063", "4188", "4452", "4502", "4503", "4507", "4519", "4523", "4528", "4543", "4568", "4578", "4661", "4689", "4901", "4911", "5020", "5108", "5401", "5713", "5802", "6098", "6178", "6273", "6301", "6326", "6367", "6501", "6502", "6503", "6586", "6594", "6645", "6702", "6752", "6758", "6861", "6869", "6902", "6920", "6954", "6971", "6981", "7011", "7201", "7203", "7267", "7269", "7270", "7309", "7733", "7741", "7751", "7832", "7974", "8001", "8002", "8031", "8035", "8053", "8058", "8113", "8267", "8306", "8308", "8309", "8316", "8411", "8591", "8604", "8630", "8697", "8725", "8750", "8766", "8801", "8802", "8830", "9020", "9021", "9022", "9101", "9202", "9432", "9433", "9434", "9735", "9843", "9983", "9984", "TOPIX100", '1475'
]

all_tickers = convert_to_tickers_format(all_raw_tickers)
dict_ticker_to_name = get_ticker_to_name_mapping(all_tickers)
print(dict_ticker_to_name)

{'1925.T': None, '1928.T': None, '2413.T': None, '2502.T': None, '2503.T': None, '2802.T': None, '2914.T': None, '3382.T': None, '3402.T': None, '3407.T': None, '4063.T': None, '4188.T': None, '4452.T': None, '4502.T': None, '4503.T': None, '4507.T': None, '4519.T': None, '4523.T': None, '4528.T': None, '4543.T': None, '4568.T': None, '4578.T': None, '4661.T': None, '4689.T': None, '4901.T': None, '4911.T': None, '5020.T': None, '5108.T': None, '5401.T': None, '5713.T': None, '5802.T': None, '6098.T': None, '6178.T': None, '6273.T': None, '6301.T': None, '6326.T': None, '6367.T': None, '6501.T': None, '6502.T': None, '6503.T': None, '6586.T': None, '6594.T': None, '6645.T': None, '6702.T': None, '6752.T': None, '6758.T': None, '6861.T': None, '6869.T': None, '6902.T': None, '6920.T': None, '6954.T': None, '6971.T': None, '6981.T': None, '7011.T': None, '7201.T': None, '7203.T': None, '7267.T': None, '7269.T': None, '7270.T': None, '7309.T': None, '7733.T': None, '7741.T': None, '7751.T

In [None]:
'''定数の設定'''
YEARS_BACK = 10  # 取得するデータの期間を10年とする
INTERVALS = ["1mo", "1wk", "1d"]  # 取得するデータの間隔

def fetch_and_save_data(tickers, interval):
    ''' 指定されたティッカーと間隔に基づいてデータを取得し、CSVとして保存する '''
    result_df = pd.DataFrame()
    end_date = pd.Timestamp.now()
    start_date = end_date - pd.DateOffset(years=YEARS_BACK)

    for ticker in tickers:
        try:
            data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
            result_df[ticker] = data["Close"]
        except Exception as e:
            print(f"Error fetching data for {ticker}. Error: {e}")

    result_df.reset_index(inplace=True)
    csv_filename = f"/Users/klynoaguilar/Desktop/01_school/Zemi/23_10_27_weeek-03/topix100_{interval}_data.csv"
    result_df.to_csv(csv_filename, index=False)
    print(f"Data fetching and saving for {interval} completed!")

def main_making_data():
    for interval in INTERVALS:
        fetch_and_save_data(all_tickers, interval)

if __name__ == "__main__":
    main_making_data()

In [36]:
df_mo = pd.read_csv('./topix100_1mo_data.csv')
df_we = pd.read_csv('./topix100_1wk_data.csv')
df_da = pd.read_csv('./topix100_1d_data.csv')

In [68]:
ALL_DATA = {
  'tickers': {
    'tc_all_raw': all_raw_tickers,
    'tc_all_formatted': all_tickers,
    'tc_dict_ticker_to_company_name': dict_ticker_to_name
  },
  'dataframe': {
    'df_mo': df_mo,
    'df_we': df_we,
    'df_da': df_da
  }
}

# 上で定義したID辞書をAttrDictに渡す
ALL_DATA = AttrDict(ALL_DATA) 

## 0.4: データの整形

# 1: データ分析

## 1.1: データの前処理

In [69]:
# 各DataFrameでNaNを含む行を排除する関数
def remove_na_rows(df):
    before_rows = len(df)
    df.dropna(inplace=True)
    after_rows = len(df)
    removed_rows = before_rows - after_rows
    return removed_rows

# ALL_DATA内の各DataFrameに関してNaNを含む行を排除する
for key in ALL_DATA.dataframe:
    removed_rows = remove_na_rows(ALL_DATA.dataframe[key])
    print(f"{key} had {removed_rows} rows with NaN values removed.")

# この後、ALL_DATA.dataframe['df-monthly']などでアクセスすると、NaNを含む行が排除されたDataFrameが返されます。

df_mo had 0 rows with NaN values removed.
df_we had 0 rows with NaN values removed.
df_da had 0 rows with NaN values removed.


## 0.3: 関数の作成