In [None]:
import twstock
import pandas as pd
import numpy as np
import pandas_ta as ta
from tqdm import tqdm
import yfinance as yf
from datetime import datetime


In [2]:
def compute_rsi(df, column='Close', window=14):
    """
    Compute RSI (Relative Strength Index) using Wilder's smoothing method.
    df: pandas DataFrame containing price data
    column: price column used to compute RSI
    window: lookback period for RSI 
    """
    # Calculate price change between current and previous row
    delta = df[column].diff()

    # Separate positive gains and negative losses
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)

    # Calculate exponential weighted moving average of gains and losses
    # alpha = 1/window corresponds to Wilder's smoothing
    avg_gain = gain.ewm(alpha=1/window, min_periods=window).mean()
    avg_loss = loss.ewm(alpha=1/window, min_periods=window).mean()
    
    # Compute Relative Strength (RS)
    rs = avg_gain / avg_loss

    # Compute RSI from RS
    rsi = 100 - (100 / (1 + rs))

    df['RSI14'] = rsi
    return df

def compute_bollinger_bands(df, column='Close', window=20, num_std=2):
    """
    Compute Bollinger Bands.
    df: pandas DataFrame containing price data
    column: price column used to compute bands 
    window: rolling window for SMA and standard deviation 
    num_std: number of standard deviations for upper/lower bands 
    """
    # Compute middle band as Simple Moving Average (SMA)
    df['BB_Middle'] = df[column].rolling(window=window).mean()
    
    # Compute standard deviation
    rolling_std = df[column].rolling(window=window).std()
    
    # Compute upper and lower bands
    df['BB_Upper'] = df['BB_Middle'] + (rolling_std * num_std)
    df['BB_Lower'] = df['BB_Middle'] - (rolling_std * num_std)

    # Calculate the relative position of Close price within Bollinger Bands
    # BB_position ranges from 0 to 1:
    #   0   -> price is at the lower band
    #   0.5 -> price is at the middle band
    #   1   -> price is at the upper band
    df['BB_position'] = (df['Close'] - df['BB_Lower']) / (df['BB_Upper'] - df['BB_Lower'])
    return df

def compute_atr(df, window=14):
    """
    Compute ATR (Average True Range)
    df: pandas DataFrame containing High, Low, Close columns
    window: lookback period for ATR 
    """
    # Previous day's close price
    prev_close = df['Close'].shift(1)
    
    # Three components of True Range (TR)
    tr1 = df['High'] - df['Low']
    tr2 = (df['High'] - prev_close).abs()
    tr3 = (df['Low'] - prev_close).abs()
    
     # True Range is the maximum of the three values
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    
    # Calculate ATR
    df['ATR'] = tr.rolling(window=window).mean()
    
    return df

In [3]:
# List of stock IDs to process
stock_ids = ['2330'] 

# Start date for downloading data
start_year = 2010
start_month = 1

print(f"Start downloading...")

# Loop through each stock ID with progress bar
for sid in tqdm(stock_ids):
	try:
		# Use twstock API to get the Stock object
		stock = twstock.Stock(sid)

			# Fetch daily trading data from the given year/month until now
		target_price = stock.fetch_from(start_year, start_month) 

			# Define column names corresponding to twstock output
		name_attribute = ['Date', 'Capacity', 'Turnover', 'Open', 'High', 'Low', 'Close', 'Change', 'Transaction']

		# Convert raw data into pandas DataFrame
		df = pd.DataFrame(columns=name_attribute, data=target_price)

		# Remove rows with zero trading volume and reset index
		# Some downloaded days have zero Capacity, so we remove these rows from the dataset.
		df = df[df['Capacity'] != 0].reset_index(drop=True)

		# --- Calculate technical indicators ---

		# Daily return rate
		df['rate'] = df['Close'].pct_change() * 100

		# Binary movement label: 1 for positive return, 0 for non-positive
		df['Movement'] = np.where(df['rate'] > 0, 1, 0)
		
		# Moving averages
		df['MA5'] = df['Close'].rolling(window=5).mean()      # 5-day moving average
		df['MA10'] = df['Close'].rolling(window=10).mean()    # 10-day moving average
		df['MA5_Capacity'] = df['Capacity'].rolling(window=5).mean() # 5-day average volume

		# Bias ratio (price deviation from moving averages, in percentage)
		df['BR5'] = (df['Close'] - df['MA5']) / df['MA5'] * 100 
		df['BR10'] = (df['Close'] - df['MA10']) / df['MA10'] * 100 

		# Daily price ranges
		df['High-Low'] = df['High'] - df['Low']       # High price -low price
		df['Open-Close'] = df['Open'] - df['Close']   # Open price - close price

		# EMA (Exponential moving averages) of closing price
		df['EMA5'] = df['Close'].ewm(span=5, adjust=False).mean()   # 5-day EMA
		df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean() # 10-day EMA

		# Standard deviation
		df['STD20'] = df['Close'].rolling(20).std()

		# RSI (Relative Strength Index)
		df = compute_rsi(df)

		# Bollinger Bands
		df = compute_bollinger_bands(df)

		# ATR (Average True Range)
		df = compute_atr(df)

		# Stochastic Oscillator (KD indicator)
		kd = ta.stoch(df['High'], df['Low'], df['Close'])
		df['K'] = kd['STOCHk_14_3_3']
		df['D'] = kd['STOCHd_14_3_3']

		# MACD (Moving Average Convergence Divergence)
		macd = ta.macd(df['Close'])
		df['MACD'] = macd['MACD_12_26_9']
		df['MACD_signal'] = macd['MACDs_12_26_9']
		df['MACD_hist'] = macd['MACDh_12_26_9']

		# ADX (Average Directional Index)
		adx = ta.adx(df['High'], df['Low'], df['Close'])
		df['ADX'] = adx['ADX_14']


		# save to csv file
		filename = f"./data/{sid}.csv"
		df.to_csv(filename, index=False)
		
	except Exception as e:
		print(f"Error processing {sid}: {e}")

Start downloading...


100%|██████████| 1/1 [01:29<00:00, 89.03s/it]


In [None]:
# download US stock data


# Download daily historical data for US stock and tw stock weighted index using yfinance
# and save them as CSV files in the ./data directory.


# 1. the list of ticker symbols to download
tickers = ['^DJI', 'TSM', '^IXIC', '^SPX', '^SOX', '^TWII']

# 2. the corresponding output file names
name = ['DJI', 'ADR', "NASDAQ", 'SPX', 'SOX', 'TW']

print(f"download: {name}")

# download each stock data and save as csv
for ticker, stock_name in zip(tickers, name):
		
	try:
		# Download daily historical data via yfinance
		df = yf.download(
			ticker,
			start="2009-12-01",
			end=datetime.now().strftime('%Y-%m-%d'),
			interval="1d",
			auto_adjust=True, # adjust for splits/dividends
			progress=False  # hide download progress bar
		)

		if not df.empty:
			df.to_csv(f'./data/{stock_name}.csv')
			print(f" -> Success! Saved as: {stock_name}.csv")
		
		else:
			print(f" -> Failed: No data for {ticker} (check ticker symbol)")

	except Exception as e:
		print(f" -> Error: {e}")

print("\nfinish")

# modify the format 
for i in name:

	df = pd.read_csv(f'./data/{i}.csv')

	# Convert the Close column to numeric type
	df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
	
	# calculate daily return rate
	df['rate'] = df['Close'].pct_change() * 100

	# Drop the 2nd and 3rd rows (index 0 and 1) from the DataFrame
	df.drop([0,1], axis=0, inplace=True)

	# Rename the column 'Price' to 'Date' 
	df.rename(columns={'Price': 'Date'}, inplace=True)
	
	df.to_csv(f'./data/{i}.csv', index=False)


download: ['DJI', 'ADR', 'NASDAQ', 'SPX', 'SOX']
 -> Success! Saved as: DJI.csv
 -> Success! Saved as: ADR.csv
 -> Success! Saved as: NASDAQ.csv
 -> Success! Saved as: SPX.csv
 -> Success! Saved as: SOX.csv

finish
