In [1]:
import yfinance as yf
import pandas as pd

In [3]:
hk_tickers = pd.read_csv('stock_list.csv').Ticker[:5]
hk_tickers

0    0001.HK
1    0002.HK
2    0003.HK
3    0004.HK
4    0005.HK
Name: Ticker, dtype: object

In [4]:
# Define years you're interested in
years = [2021, 2022, 2023, 2024]

# Dictionary to store results
results = {}

for ticker in hk_tickers:
    try:
        # Download historical data
        data = yf.download(ticker, start="2021-01-01", end='2025-01-01')

        yearly_data = {}

        for year in years:
            yearly_subset = data[data.index.year == year]
            if not yearly_subset.empty:
                # Extract scalar float values using .item() or direct access
                first_close = yearly_subset.iloc[0]['Close'].item()
                last_close = yearly_subset.iloc[-1]['Close'].item()
                yearly_data[year] = {
                    'First Trading Day Close': round(first_close, 5),
                    'Last Trading Day Close': round(last_close, 5)
                }
            else:
                yearly_data[year] = {
                    'First Trading Day Close': None,
                    'Last Trading Day Close': None
                }

        results[ticker] = yearly_data

    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        results[ticker] = {year: {'First Trading Day Close': None, 'Last Trading Day Close': None} for year in years}

# Build DataFrame correctly from results
df_list = []

for ticker, data in results.items():
    for year, values in data.items():
        row = {
            'Ticker': ticker,
            'Year': year,
            'First_Close': values['First Trading Day Close'],
            'Last_Close': values['Last Trading Day Close']
        }
        df_list.append(row)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [5]:
# Create DataFrame
df = pd.DataFrame(df_list)

# Optional: Sort by Ticker and Year for better readability
#df = df.sort_values(by=['Ticker', 'Year']).reset_index(drop=True)

# Display result
df

Unnamed: 0,Ticker,Year,First_Close,Last_Close
0,0001.HK,2021,45.0211,42.41904
1,0001.HK,2022,43.26236,41.55441
2,0001.HK,2023,41.95355,39.39933
3,0001.HK,2024,38.83447,41.55
4,0002.HK,2021,59.52163,66.83364
5,0002.HK,2022,67.68231,50.50368
6,0002.HK,2023,50.50368,60.28979
7,0002.HK,2024,59.7753,64.15617
8,0003.HK,2021,9.31115,10.43065
9,0003.HK,2022,10.55094,6.64592


In [6]:
# Pivot the DataFrame to wide format
df_wide = df.pivot(index='Ticker', columns='Year', values=['First_Close', 'Last_Close'])

# Flatten the MultiIndex columns
df_wide.columns = [f"{col}_{year}" for col, year in df_wide.columns]

# Reset index to make Ticker a column again
df_wide.reset_index(inplace=True)

# Display result
df_wide.head(10)

Unnamed: 0,Ticker,First_Close_2021,First_Close_2022,First_Close_2023,First_Close_2024,Last_Close_2021,Last_Close_2022,Last_Close_2023,Last_Close_2024
0,0001.HK,45.0211,43.26236,41.95355,38.83447,42.41904,41.55441,39.39933,41.55
1,0002.HK,59.52163,67.68231,50.50368,59.7753,66.83364,50.50368,60.28979,64.15617
2,0003.HK,9.31115,10.55094,6.74444,5.60015,10.43065,6.64592,5.64737,6.19
3,0004.HK,18.39696,21.87647,21.98577,23.52696,22.24804,21.79583,24.45054,21.42258
4,0005.HK,31.62503,38.02259,41.25123,57.15973,38.06317,41.12417,57.20513,73.41429


In [7]:
df_diff = pd.DataFrame()
df_diff['Ticker'] = df_wide.Ticker

# Loop through all years present in the data
for year in years:  # assuming you already defined this earlier
    first_col = f'First_Close_{year}'
    last_col = f'Last_Close_{year}'
    diff_col = f'Close_Difference_{year}'
    
    # Calculate the difference
    df_diff[diff_col] = df_wide[last_col] - df_wide[first_col]

# Display updated DataFrame
df_diff

Unnamed: 0,Ticker,Close_Difference_2021,Close_Difference_2022,Close_Difference_2023,Close_Difference_2024
0,0001.HK,-2.60206,-1.70795,-2.55422,2.71553
1,0002.HK,7.31201,-17.17863,9.78611,4.38087
2,0003.HK,1.1195,-3.90502,-1.09707,0.58985
3,0004.HK,3.85108,-0.08064,2.46477,-2.10438
4,0005.HK,6.43814,3.10158,15.9539,16.25456


In [9]:
pd3 = pd.merge(df_wide, df_diff, on='Ticker')
pd3

Unnamed: 0,Ticker,First_Close_2021,First_Close_2022,First_Close_2023,First_Close_2024,Last_Close_2021,Last_Close_2022,Last_Close_2023,Last_Close_2024,Close_Difference_2021,Close_Difference_2022,Close_Difference_2023,Close_Difference_2024
0,0001.HK,45.0211,43.26236,41.95355,38.83447,42.41904,41.55441,39.39933,41.55,-2.60206,-1.70795,-2.55422,2.71553
1,0002.HK,59.52163,67.68231,50.50368,59.7753,66.83364,50.50368,60.28979,64.15617,7.31201,-17.17863,9.78611,4.38087
2,0003.HK,9.31115,10.55094,6.74444,5.60015,10.43065,6.64592,5.64737,6.19,1.1195,-3.90502,-1.09707,0.58985
3,0004.HK,18.39696,21.87647,21.98577,23.52696,22.24804,21.79583,24.45054,21.42258,3.85108,-0.08064,2.46477,-2.10438
4,0005.HK,31.62503,38.02259,41.25123,57.15973,38.06317,41.12417,57.20513,73.41429,6.43814,3.10158,15.9539,16.25456


In [10]:
# Get list of years present in the data
years_in_df = sorted({int(col.split('_')[-1]) for col in pd3.columns if '_' in col})

# Create a new column order: for each year, place First_Close_YEAR followed by Last_Close_YEAR
new_column_order = ['Ticker']  # Start with Ticker
for year in years_in_df:
    new_column_order.append(f'First_Close_{year}')
    new_column_order.append(f'Last_Close_{year}')
    new_column_order.append(f'Close_Difference_{year}')

# Reindex columns
pd3 = pd3[new_column_order]

# Display result
pd3

Unnamed: 0,Ticker,First_Close_2021,Last_Close_2021,Close_Difference_2021,First_Close_2022,Last_Close_2022,Close_Difference_2022,First_Close_2023,Last_Close_2023,Close_Difference_2023,First_Close_2024,Last_Close_2024,Close_Difference_2024
0,0001.HK,45.0211,42.41904,-2.60206,43.26236,41.55441,-1.70795,41.95355,39.39933,-2.55422,38.83447,41.55,2.71553
1,0002.HK,59.52163,66.83364,7.31201,67.68231,50.50368,-17.17863,50.50368,60.28979,9.78611,59.7753,64.15617,4.38087
2,0003.HK,9.31115,10.43065,1.1195,10.55094,6.64592,-3.90502,6.74444,5.64737,-1.09707,5.60015,6.19,0.58985
3,0004.HK,18.39696,22.24804,3.85108,21.87647,21.79583,-0.08064,21.98577,24.45054,2.46477,23.52696,21.42258,-2.10438
4,0005.HK,31.62503,38.06317,6.43814,38.02259,41.12417,3.10158,41.25123,57.20513,15.9539,57.15973,73.41429,16.25456


In [11]:
pd3.to_csv('test/stock_diff.csv', index=False)

In [11]:
# Total Return=((Ending Price−Starting Price) +Dividends )/ Starting Price)×100%
# add all Dividends of the year 

In [12]:
#P/E Ratio= Share Price(year end closing price)/ EPS

In [None]:
#GROWTH = ((Ending Price−Starting Price) +Dividends )