In [44]:
# python libraries to import
import pandas as pd
import numpy as np

# local functions must sit in same directory as this file
import scrapeASX

# global settings
xDirectory = "C:/Users/Denise/Documents/DataScience/ASX300/data/raw_ASXIndex"
xYearStart = 2015
xYearEnd = 2021

In [2]:
# update all data
lstASXIndex = scrapeASX.lstASXIndex(2017,xYearEnd,xDirectory) #cannot start before 2017
lstCode = sorted(lstASXIndex['Code'].unique().tolist())

xYear = 2020
scrapeASX.scrapeASX_AllbyYear(xYear,xDirectory,lstCode)

In [65]:
# load all data into memory
xYear = 2020
dfASXIndex = pd.read_csv(xDirectory + "/ASXIndex.csv", encoding='utf-8')
dfASXAnnTitle = pd.read_csv(xDirectory + "/ASXAnnTitle_" + str(xYear) + ".csv", encoding='utf-8')
dfASXPrice = pd.read_csv(xDirectory + "/ASXPrice_" + str(xYear) + ".csv", encoding='utf-8')
dfASXDividends = pd.read_csv(xDirectory + "/ASXDividends_" + str(xYear) + ".csv", encoding='utf-8')
#dfASXSplits = pd.read_csv(xDirectory + "/ASXSplits_" + str(xYear) + ".csv", encoding='utf-8')
dfASXShortInt = pd.read_csv(xDirectory + "/ASXShortInt_" + str(xYear) + ".csv", encoding='utf-8')

In [30]:
lstDate = sorted(dfASXPrice['Date'].unique().tolist(),reverse=True)
xCurr = lstDate[0]
xPrev = lstDate[1]
xDaterange = lstDate[:2]
xcolPrice = ['Date','Code','Close']

In [82]:
# get current index constituents
currASXIndex = dfASXIndex[dfASXIndex['Date']==dfASXIndex['Date'].max()]

In [77]:
# Get top and bottom 20 movers
temp = dfASXPrice[dfASXPrice['Date'].isin(xDaterange)][xcolPrice]
temp = temp.pivot_table(index=['Code'],columns='Date',values='Close').reset_index()
temp.columns = ['Code','Prev','Curr']
temp['Change'] = np.round(100 * (temp['Curr']/temp['Prev'] - 1),2)
temp = temp.merge(currASXIndex[['Code','Sector']],how='left',on=['Code'])
temp = temp[temp['Change'].notna()]
temp = temp.sort_values('Change')
print(temp.iloc[-5:].sort_values(by=['Change'],ascending=False))
print(temp.iloc[:5])

    Code   Prev   Curr  Change       Sector
205  NCZ  0.110  0.145   31.82    Materials
230  PDN  0.050  0.061   22.00       Energy
186  MLX  0.069  0.078   13.04          NaN
314  VUK  1.280  1.440   12.50   Financials
307  URW  4.500  4.990   10.89  Real Estate
    Code   Prev   Curr  Change       Sector
208  NGI  2.310  1.700  -26.41   Financials
18   AMA  0.405  0.360  -11.11  Industrials
31   ARF  2.140  1.930   -9.81  Real Estate
252  RIC  0.805  0.730   -9.32          NaN
248  REG  1.390  1.275   -8.27  Health Care


In [81]:
# average move by sector (not weighted average)
print(temp['Change'].mean())
temp.groupby('Sector')['Change'].mean().sort_values()

-0.17297213622290977


Sector
Health Care                  -1.725652
Information Technology       -0.964000
Consumer Staples             -0.918500
Telecommunication Services   -0.830000
Utilities                    -0.410000
Industrials                  -0.296552
Financials                   -0.258108
Energy                       -0.120588
Materials                     0.122105
Real Estate                   0.701250
Consumer Discretionary        1.286452
Name: Change, dtype: float64

In [85]:
# get announcements - why doesnt it download most recent month?
# how to integrate cluster numbers here
dfASXAnnTitle[dfASXAnnTitle['Date']==dfASXAnnTitle['Date'].max()].head()

Unnamed: 0,Date,Time,Title,Link,Code
182,31/03/2020,1:25 PM,Update on General Meeting,/asx/statistics/displayAnnouncement.do?display...,AJM
183,31/03/2020,1:19 PM,Notice of General Meeting/Proxy Form,/asx/statistics/displayAnnouncement.do?display...,AJM
308,31/03/2020,3:20 PM,Becoming a substantial holder from CBA,/asx/statistics/displayAnnouncement.do?display...,AMA
344,31/03/2020,8:26 AM,Operating Update - Peak Shaft Ore Hoisting Rec...,/asx/statistics/displayAnnouncement.do?display...,AMI
471,31/03/2020,3:59 PM,Dividend/Distribution - ANZPF,/asx/statistics/displayAnnouncement.do?display...,ANZ


In [92]:
# get short interest top 10
currASXShortInt = dfASXShortInt[dfASXShortInt['Date']==dfASXShortInt['Date'].max()]
currASXShortInt.sort_values('Short Percent',ascending=False)[:10]

Unnamed: 0,Name,Code,Short Units,Total Units,Short Percent,Date
37155,GALAXY RESOURCES ORDINARY,GXY,69627791,409479338,17.003982,2020-04-02
37310,OOH!MEDIA LIMITED ORDINARY,OML,36121092,242385958,14.902304,2020-04-02
37408,SPEEDCAST INT LTD ORDINARY,SDA,31616362,239741258,13.187702,2020-04-02
37315,OROCOBRE LIMITED ORDINARY,ORE,34532080,261977578,13.181311,2020-04-02
37291,NEWS CORP.. A NON-VOTING CDI 1:1,NWSLV,2231372,18372533,12.145152,2020-04-02
37198,INGHAMS GROUP ORDINARY,ING,39326721,371679601,10.580812,2020-04-02
37216,JB HI-FI LIMITED ORDINARY,JBH,12096271,114883372,10.529175,2020-04-02
37287,NEW CENTURY RESOURCE ORDINARY,NCZ,64171944,637854575,10.060592,2020-04-02
37075,CLINUVEL PHARMACEUT. ORDINARY,CUV,4695369,49410338,9.502807,2020-04-02
37278,MYER HOLDINGS LTD ORDINARY,MYR,77510651,821278815,9.4378,2020-04-02


In [142]:
# get largest change in short interest over the week and price change since
xWeek = sorted(dfASXShortInt['Date'].unique().tolist(),reverse=True)[:6]
xWeekrange = [xWeek[0],xWeek[5]]
xcolShortInt = ['Date','Code','Short Percent']
temp2 = dfASXShortInt[dfASXShortInt['Date'].isin(xWeekrange)][xcolShortInt]
temp2['Code'] = temp2['Code'].str.strip()
temp2 = temp2.pivot_table(index=['Code'],columns='Date',values='Short Percent').reset_index()
temp2.columns = ['Code','Prev','Curr']
temp2['Change'] =  np.round(temp2['Curr'] - temp2['Prev'],2)
temp2 = temp2.merge(currASXIndex[['Code','Sector']],how='left',on=['Code'])
temp2 = temp2[temp2['Sector'].notna()]
temp2 = temp2.sort_values('Change')
print(temp2.iloc[-5:].sort_values(by=['Change'],ascending=False))
print(temp2.iloc[:5])

    Code      Prev       Curr  Change                  Sector
621  Z1P  5.613515   7.229060    1.62              Financials
382  MYR  7.956364   9.437800    1.48  Consumer Discretionary
310  JBH  9.407719  10.529175    1.12  Consumer Discretionary
96   BLD  3.028398   4.109247    1.08               Materials
37   AMA  3.320624   4.146070    0.83             Industrials
    Code      Prev      Curr  Change                  Sector
556  SYR  9.652596  7.173920   -2.48               Materials
136  CGC  6.894994  5.286505   -1.61        Consumer Staples
126  CCP  2.738475  1.423116   -1.32              Financials
154  CNI  1.716906  0.447001   -1.27              Financials
411  NXT  6.112283  4.965658   -1.15  Information Technology
