# Cel notatnika: rozkminić źródła danych i automatyzację pobierania

In [3]:
import pickle
import pandas as pd
import time
from tqdm import tqdm

with open(f'AllInfo.pkl', 'rb') as f:
    all_info = pickle.load(f)
    
all_info = pd.DataFrame(all_info).T
all_info.reset_index(names='Symbol', inplace=True)
all_info.drop(columns=['WolumenMinimalny', 'WolumenKrok', 'Typ'], inplace=True)

all_info.head()

Unnamed: 0,Symbol,Waluta,SpreadAbs,SpreadProc,Opis,TER
0,IU5C.DE,EUR,0.01,0.0009,"iShares, UCITS, ACC, EUR",0.15
1,SYBL.DE,EUR,0.097,0.0022,"SPDR, UCITS, DIST, EUR",0.15
2,U3O8.DE,EUR,0.034,0.0045,"Sprott, UCITS, ACC, EUR",0.85
3,UETW.DE,EUR,0.035,0.0011,"UBS, UCITS, ACC, EUR",0.1
4,XMME.L,USD,0.06,0.001,"Xtrackers, UCITS, ACC, USD",0.18


## `yfinance`

In [4]:
# Instalacja biblioteki
# pip install yfinance

import yfinance as yf

In [5]:
def get_history(symbol: str) -> pd.DataFrame:
    
    if symbol in ['EURPLN', 'CHFPLN', 'USDPLN', 'GBPPLN']: s = symbol+'=X'
    else: s = symbol
    
    ticker = yf.Ticker(s)
    history = ticker.history(start='1900-01-01', end='2025-08-23', interval='1d')
    history = history.loc[:, ['Open', 'Close', 'Volume']]
    history.columns = [f'{symbol}_{c}' for c in history.columns]
    
    return history

get_history('P500.DE').join(get_history('4GLD.DE'), how='outer')

Unnamed: 0_level_0,P500.DE_Open,P500.DE_Close,P500.DE_Volume,4GLD.DE_Open,4GLD.DE_Close,4GLD.DE_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-11-13 00:00:00+01:00,,,,23.950001,24.090000,348757
2009-11-16 00:00:00+01:00,,,,24.250000,24.440001,522167
2009-11-17 00:00:00+01:00,,,,24.450001,24.650000,164044
2009-11-18 00:00:00+01:00,,,,24.650000,24.650000,293675
2009-11-19 00:00:00+01:00,,,,24.680000,24.559999,147696
...,...,...,...,...,...,...
2025-08-18 00:00:00+02:00,1094.849976,1096.099976,896.0,92.120003,91.870003,109813
2025-08-19 00:00:00+02:00,1096.150024,1094.349976,472.0,91.900002,91.665001,90761
2025-08-20 00:00:00+02:00,1092.250000,1086.949951,921.0,91.785004,92.135002,98643
2025-08-21 00:00:00+02:00,1093.050049,1092.550049,1121.0,92.275002,92.540001,106682


In [7]:
symbols_list = list(all_info['Symbol'])
for i, symbol in tqdm(enumerate(symbols_list), total=len(symbols_list)):    
    
    if i == 0:
        data = get_history(symbol)
    else:
        tmp = get_history(symbol)
        data = data.join(tmp, how='outer')
        
    time.sleep(10.0)

  0%|          | 0/1318 [00:10<?, ?it/s]


KeyboardInterrupt: 

In [None]:
data.head()

Unnamed: 0_level_0,IU5C.DE,IU5C.DE,IU5C.DE,SYBL.DE,SYBL.DE,SYBL.DE,U3O8.DE,U3O8.DE,U3O8.DE,UETW.DE,...,VERE.DE,XMBR.DE,XMBR.DE,XMBR.DE,WELK.DE,WELK.DE,WELK.DE,XNNV.DE,XNNV.DE,XNNV.DE
Unnamed: 0_level_1,Open,Close,Volume,Open,Close,Volume,Open,Close,Volume,Open,...,Volume,Open,Close,Volume,Open,Close,Volume,Open,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2000-01-03 05:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2000-01-04 05:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2000-01-05 05:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2000-01-06 05:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2000-01-07 05:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,


In [None]:
data.to_parquet('Data/HistoricalData.parquet.gzip', compression='gzip')

In [None]:
def get_info(symbol: str) -> dict:
    
    if symbol in ['EURPLN', 'CHFPLN', 'USDPLN', 'GBPPLN']: s = symbol+'=X'
    else: s = symbol
    
    time.sleep(1.0)
    
    ticker = yf.Ticker(s)
    info = ticker.info

    filtered_info = {
        'symbol': info.get('symbol'),
        'shortName': info.get('shortName'),
        'longName': info.get('longName'),
        'language': info.get('language'),
        'fundFamily': info.get('fundFamily'),
        
        'quoteType': info.get('quoteType'),
        'quoteSourceName': info.get('quoteSourceName'),
        
        'currency': info.get('currency'),
        'netExpenseRatio': info.get('netExpenseRatio'),
        'priceHint': info.get('priceHint'),
        
        'trailingAnnualDividendRate': info.get('trailingAnnualDividendRate'),
        'trailingAnnualDividendYield': info.get('trailingAnnualDividendYield'),
        
        'exchange': info.get('exchange'),
        'fullExchangeName': info.get('fullExchangeName'),
        'exchangeTimezoneName': info.get('exchangeTimezoneName'),
        
        'esgPopulated': info.get('esgPopulated')
    }

    return filtered_info

tqdm.pandas()
yf_info = all_info['Symbol'].progress_apply(get_info)

yf_info = pd.DataFrame(list(yf_info))
yf_info = yf_info[yf_info['symbol'].notna()].reset_index(drop=True)

  9%|▉         | 125/1318 [03:19<35:07,  1.77s/it]HTTP Error 404: 
 28%|██▊       | 364/1318 [09:43<26:55,  1.69s/it]HTTP Error 404: 
 89%|████████▉ | 1170/1318 [32:00<03:54,  1.59s/it]HTTP Error 404: 
 96%|█████████▌| 1264/1318 [34:32<01:24,  1.56s/it]HTTP Error 404: 
100%|██████████| 1318/1318 [36:00<00:00,  1.64s/it]


ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [None]:
yf_info.to_parquet('Data/InstrumentsInfo.parquet.gzip', compression='gzip')

In [7]:
pd.read_parquet('Data/InstrumentsInfo.parquet.gzip')

Unnamed: 0,symbol,shortName,longName,language,fundFamily,quoteType,quoteSourceName,currency,netExpenseRatio,priceHint,exchange,fullExchangeName,exchangeTimezoneName,esgPopulated
0,IU5C.DE,iShs V-S&P 500 Commun.S.UC.ETFR,iShares V PLC - iShares S&P 500 Communication ...,en-US,BlackRock Asset Management Ireland - ETF,ETF,Delayed Quote,EUR,0.15,2.0,GER,XETRA,Europe/Berlin,False
1,SYBL.DE,SPDR Bloom.15+Y.Gilt UETF R,SPDR Bloomberg 15+ Year Gilt UCITS ETF,en-US,State Street Global Advisors Europe Limited,ETF,Delayed Quote,EUR,0.15,2.0,GER,XETRA,Europe/Berlin,False
2,U3O8.DE,HANETF-Sprott Uran.Mnrs ETF R,Sprott Uranium Miners UCITS ETF Accumulating,en-US,HANetf Management Limited,ETF,Delayed Quote,EUR,,2.0,GER,XETRA,Europe/Berlin,False
3,UETW.DE,UBS(I)ETF-U.Cor.MSCI Wld U.ETFR,UBS Core MSCI World UCITS ETF USD acc,en-US,UBS Fund Management (Luxembourg) S.A.,ETF,Delayed Quote,EUR,0.06,2.0,GER,XETRA,Europe/Berlin,False
4,XMME.L,XTRACKERS (IE) PUBLIC LIMITED C,Xtrackers MSCI Emerging Markets UCITS ETF 1C,en-US,DWS Investment S.A. (ETF),ETF,Delayed Quote,USD,0.18,2.0,LSE,LSE,Europe/London,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1309,RBOT.L,ISHARES IV PLC ISHS AUTO & ROBO,iShares Automation & Robotics UCITS ETF,en-US,BlackRock Asset Management Ireland - ETF,ETF,Delayed Quote,USD,0.40,2.0,LSE,LSE,Europe/London,False
1310,VERE.DE,Vang.FTSE Dev.Eur.ex UK U.ETF R,Vanguard FTSE Developed Europe ex UK UCITS ETF...,en-US,Vanguard Group (Ireland) Limited,ETF,Delayed Quote,EUR,,2.0,GER,XETRA,Europe/Berlin,False
1311,XMBR.DE,Xtrackers MSCI Brazil I,Xtrackers MSCI Brazil UCITS ETF 1C,en-US,DWS Investment S.A. (ETF),ETF,Delayed Quote,EUR,0.25,2.0,GER,XETRA,Europe/Berlin,False
1312,WELK.DE,Amu.S&P Wld Finan.Screen.UETF R,Amundi ETF ICAV - Amundi S&P World Financials ...,en-US,Amundi Ireland Limited,ETF,Delayed Quote,EUR,0.18,2.0,GER,XETRA,Europe/Berlin,False


In [15]:
# Utworzenie obiektu ticker dla ETF-a na S&P 500 (SPY)
spy_etf = yf.Ticker("SPY")

# Pobranie danych intraday (co 5 minut) z ostatnich 5 dni
hist_intraday = spy_etf.history(period="5d", interval="5m")

print("Dane intraday dla SPY z ostatnich 5 dni:")
hist_intraday.head()

Dane intraday dla SPY z ostatnich 5 dni:


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-08-18 09:30:00-04:00,642.859985,643.445007,642.799988,643.399902,2028539,0.0,0.0,0.0
2025-08-18 09:35:00-04:00,643.400024,643.585022,643.049988,643.309998,985418,0.0,0.0,0.0
2025-08-18 09:40:00-04:00,643.320007,643.419983,642.804993,643.200012,798013,0.0,0.0,0.0
2025-08-18 09:45:00-04:00,643.190002,643.799988,643.169983,643.549988,992450,0.0,0.0,0.0
2025-08-18 09:50:00-04:00,643.540405,643.559998,642.830017,642.98999,866464,0.0,0.0,0.0


In [16]:
# --- Przykład 1: Akcje spółki CD Projekt ---
print("--- Pobieranie danych dla CD Projekt (CDR.WA) ---")
try:
    # Utworzenie obiektu Ticker
    cd_projekt = yf.Ticker("CDR.WA")
    
    # Pobranie słownika z informacjami
    info_cdr = cd_projekt.info
    
    # Wyświetlenie wybranych informacji
    print(f"Pełna nazwa: {info_cdr.get('longName', 'Brak danych')}")
    print(f"Sektor: {info_cdr.get('sector', 'Brak danych')}")
    print(f"Strona WWW: {info_cdr.get('website', 'Brak danych')}")
    print(f"Kapitalizacja rynkowa: {info_cdr.get('marketCap', 'Brak danych'):,}") # Formatowanie liczby
    print("\nOpis działalności:")
    print(info_cdr.get('longBusinessSummary', 'Brak opisu.'))

except Exception as e:
    print(f"Nie udało się pobrać danych dla CDR.WA: {e}")


# --- Przykład 2: ETF na S&P 500 ---
print("\n\n--- Pobieranie danych dla ETF SPDR S&P 500 (SPY) ---")
try:
    # Utworzenie obiektu Ticker
    spy_etf = yf.Ticker("SPY")

    # Pobranie słownika z informacjami
    info_spy = spy_etf.info
    
    # Wyświetlenie wybranych informacji
    print(f"Pełna nazwa: {info_spy.get('longName', 'Brak danych')}")
    print(f"Kategoria funduszu: {info_spy.get('category', 'Brak danych')}")
    print(f"Suma aktywów: {info_spy.get('totalAssets', 'Brak danych'):,}")
    print(f"Stopa dywidendy (yield): {info_spy.get('yield', 0) * 100:.2f}%") # Formatowanie do procentów
    
except Exception as e:
    print(f"Nie udało się pobrać danych dla SPY: {e}")

--- Pobieranie danych dla CD Projekt (CDR.WA) ---
Pełna nazwa: CD Projekt S.A.
Sektor: Communication Services
Strona WWW: https://www.cdprojekt.com
Kapitalizacja rynkowa: 25,766,916,096

Opis działalności:
CD Projekt S.A., together its subsidiaries, engages in the development, publishing, and digital distribution of video games for personal computers and video game consoles in Poland. It operates in two segments, CD PROJEKT RED and GOG.com. The company's product portfolio comprises Cyberpunk 2077: Phantom Liberty; Cyberpunk 2077; The Witcher 3: Wild Hunt; The Witcher 2: Assassins of Kings; The Witcher; and GWENT: The Witcher Card Game. It also distributes videogames through GOG.com distribution platform and GOG GALAXY application, as well as online channels. The company also exports its products to rest of Europe, North America, South America, Asia, Australia, and Africa. CD Projekt S.A. was incorporated in 2001 and is headquartered in Warsaw, Poland.


--- Pobieranie danych dla ETF SP

In [18]:
with open(f'AllInfo.pkl', 'rb') as f:
    all_info = pickle.load(f)

In [6]:
ticker = yf.Ticker("P500.DE")
ticker.info

{'companyOfficers': [],
 'executiveTeam': [],
 'maxAge': 86400,
 'priceHint': 2,
 'previousClose': 1108.45,
 'open': 1112.5,
 'dayLow': 1109.0,
 'dayHigh': 1115.85,
 'regularMarketPreviousClose': 1108.45,
 'regularMarketOpen': 1112.5,
 'regularMarketDayLow': 1109.0,
 'regularMarketDayHigh': 1115.85,
 'volume': 786,
 'regularMarketVolume': 786,
 'averageVolume': 1299,
 'averageVolume10days': 1119,
 'averageDailyVolume10Day': 1119,
 'bid': 1114.55,
 'ask': 1114.9,
 'bidSize': 1088,
 'askSize': 1088,
 'fiftyTwoWeekLow': 864.14,
 'fiftyTwoWeekHigh': 1162.75,
 'fiftyDayAverage': 1084.49,
 'twoHundredDayAverage': 1068.111,
 'trailingAnnualDividendRate': 0.0,
 'trailingAnnualDividendYield': 0.0,
 'navPrice': 1294.4316,
 'currency': 'EUR',
 'tradeable': False,
 'fundFamily': 'Invesco Investment Management Limited',
 'fundInceptionDate': 1274313600,
 'legalType': 'Exchange Traded Fund',
 'quoteType': 'ETF',
 'symbol': 'P500.DE',
 'language': 'en-US',
 'region': 'US',
 'typeDisp': 'ETF',
 'quote