# Data Collection Notebook

## Imports and Reading Raw ASX Data

In [2]:
import pandas as pd
import numpy as np
import yfinance as yf

asx = pd.read_csv("ASX_Listed_Companies_23-06-2021_10-32-50_AEST.csv").drop_duplicates(subset="Company name")
asx_200 = (asx.sort_values(by=["Market Cap"], ascending=False, na_position="last")).head(200)

# Initialise dict of yf.ticker objects for each company in the ASX (200)
asx_200_codelist = asx_200["ASX code"].tolist()
ticker_dict = dict()

for code in asx_200_codelist:
    request_code = code + ".AX"
    ticker = yf.Ticker(request_code)
    ticker_dict[code] = ticker

## Retrieve Company Financials

### Took about 20 minutes last time to run (ASX200 Only)

In [24]:
for code in ticker_dict:
    print(code)
    ticker = ticker_dict[code]
    income_statement = ticker.financials
    balance_sheet = ticker.balance_sheet
    quarterly_bs = ticker.quarterly_balance_sheet
    cash_flow_statement = ticker.cashflow
    sustainability = ticker.sustainability
    major_holders = ticker.major_holders
    
    # get historical market data
    hist = ticker.history(period="5y")
    
    income_statement.to_csv(f"IncomeStatements/{code}_IS.csv")
    balance_sheet.to_csv(f"BalanceSheets/{code}_BS.csv")
    quarterly_bs.to_csv(f"QuarterlyBS/{code}_QBS.csv")
    cash_flow_statement.to_csv(f"CashFlowStatements/{code}_CFS.csv")
    try:
        sustainability.to_csv(f"Sustainability/{code}_Sust.csv")
    except:
        print(f"{code} had no sustainability report")
        
    try: 
        major_holders.to_csv(f"MajorHolders/{code}_Holders.csv")
    except:
        print(f"{code} had no major holders data")
        
    try:
        hist.to_csv(f"PriceData/{code}_5YrPriceData.csv")
    except:
        print(f"{code} had no price data")

CBA
BHP
CSL
WBC
NAB
ANZ
FMG
WES
MQG
WOW
RMD
RMD had no sustainability report
RIO
TLS
TCL
GMG
APT
APT had no sustainability report
ALL
AMC
AMC had no sustainability report
WPL
REA
COL
COL had no sustainability report
NCM
JHX
XRO
XRO had no sustainability report
NWS
NWS had no sustainability report
SHL
URW
URW had no sustainability report
FPH
FPH had no sustainability report
QBE
BXB
COH
SYD
STO
REH
REH had no sustainability report
ASX
SCG
RHC
SUN
S32
IAG
MEZ
MEZ had no sustainability report
NST
NST had no sustainability report
MGR
DXS
TAH
TAH had no sustainability report
SEK
SEK had no sustainability report
SGP
TPG
TPG had no sustainability report
APA
KLA
KLA had no sustainability report
BSL
DMP
DMP had no sustainability report
JHG
JHG had no sustainability report
WTC
WTC had no sustainability report
CPU
AIA
AGG
AGG had no sustainability report
MFG
MFG had no sustainability report
GPT
AFI
AFI had no sustainability report
MIN
MIN had no sustainability report
QAN
QAN had no sustainability 

In [17]:
ticker_dict["ABP"].balance_sheet.to_csv(f"BalanceSheets/ABP_BS.csv")

In [31]:
data = yf.download(
    tickers=request_string,
    period="1mo",
)

[*********************100%***********************]  200 of 200 completed


In [32]:
data

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,A2M.AX,ABC.AX,ABP.AX,AFI.AX,AGG.AX,AGL.AX,AIA.AX,ALD.AX,ALL.AX,ALQ.AX,...,WHC.AX,WOR.AX,WOW.AX,WPL.AX,WPR.AX,WTC.AX,XRO.AX,YAL.AX,Z1P.AX,ZIM.AX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-05-24,5.42,3.36,2.97,7.48,6.68,8.26,6.77,29.02,40.552998,10.822001,...,7179710.0,1758586.0,1085836.0,1709288.0,1973764.0,294537,537868.0,15668.0,10511393.0,12649.0
2021-05-25,5.33,3.4,3.0,7.47,6.53,8.18,6.81,28.969999,40.981445,10.772586,...,7998536.0,3662561.0,1066790.0,1293833.0,1712861.0,426105,430526.0,3844.0,11972829.0,6093.0
2021-05-26,5.31,3.34,2.98,7.51,6.68,8.24,6.68,29.09,40.911697,12.156221,...,6839037.0,1852721.0,1229685.0,1483314.0,1568178.0,717022,487751.0,46776.0,5820716.0,1823.0
2021-05-27,5.33,3.31,2.95,7.51,6.45,8.19,6.7,29.35,41.379997,11.859728,...,11944729.0,2707019.0,2833946.0,3693989.0,2983396.0,670650,2260422.0,39620.0,7629563.0,5049.0
2021-05-28,5.54,3.33,3.01,7.5,6.18,8.27,6.82,29.17,41.91,11.968441,...,12130128.0,1049948.0,1756525.0,2142239.0,1554487.0,560872,388836.0,84168.0,5193972.0,3423.0
2021-05-31,5.52,3.32,3.01,6.03,6.28,8.12,6.85,28.549999,41.049999,12.235286,...,6569545.0,1183867.0,2414541.0,1949396.0,1438166.0,398950,346800.0,4669.0,4667111.0,5625.0
2021-06-01,5.66,3.23,3.01,7.56,6.5,8.03,6.91,28.76,40.950001,12.403298,...,11844511.0,1345133.0,1177217.0,1753817.0,1151015.0,505935,220026.0,20737.0,6343375.0,9252.0
2021-06-02,5.72,3.36,3.04,7.55,6.27,8.35,7.09,28.559999,41.310001,12.156221,...,16083259.0,4618681.0,1812624.0,4577642.0,1073264.0,693301,404260.0,51381.0,5537287.0,3742.0
2021-06-03,5.7,3.24,3.06,7.53,6.15,8.66,7.12,29.15,40.669998,12.324234,...,10690780.0,4080307.0,1659493.0,3912108.0,1285832.0,665068,405464.0,80394.0,7222569.0,3725.0
2021-06-04,5.7,3.2,3.05,7.65,6.1,8.82,7.14,29.629999,40.950001,12.344,...,8631853.0,2628915.0,2318067.0,2331526.0,1300742.0,478475,446762.0,77311.0,5327984.0,2632.0


In [3]:
wow.history(period="max")

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1993-08-02,1.083447,1.083447,1.083447,1.083447,0,0.0,0.0
1993-08-03,1.083447,1.083447,1.083447,1.083447,0,0.0,0.0
1993-08-04,1.083447,1.083447,1.083447,1.083447,0,0.0,0.0
1993-08-05,1.083447,1.083447,1.083447,1.083447,0,0.0,0.0
1993-08-06,1.083447,1.083447,1.083447,1.083447,0,0.0,0.0
...,...,...,...,...,...,...,...
2021-06-17,43.779999,43.779999,43.150002,43.349998,4539024,0.0,0.0
2021-06-18,43.480000,43.580002,42.549999,42.669998,5834452,0.0,0.0
2021-06-21,42.549999,43.160000,42.240002,43.020000,4494154,0.0,0.0
2021-06-22,43.200001,43.740002,43.169998,43.349998,2503659,0.0,0.0
