In [1]:
# Dependencies

import pandas as pd

In [2]:
# Set the path for CSV files

stocks_data_path = "resources\merged_industries.csv"
splits_data_path = "resources\splits_data.csv"
dividends_data_path = "resources\dividends_data.csv"

In [3]:
# Read the CSV file into a DataFrame

stocks_data_df = pd.read_csv(stocks_data_path)
stocks_data_df.head()  

Unnamed: 0,ASX Code,Security Group Code,Issuer Full Name,Product Description,Last Price($),Business Date,Year,Company Name,Sub-Industry
0,1AD,EQY,ADALTA LIMITED ...,ORDINARY,0.28,20170630,2017,Adalta Limited Ordinary,Biotechnology & Medical Research
1,1AG,EQY,ALTERRA LIMITED ...,ORDINARY,0.024,20170630,2017,Alterra Limited Ordinary,Environmental Services & Equipment
2,3DP,EQY,POINTERRA LIMITED ...,ORDINARY,0.025,20170629,2017,Pointerra Limited Ordinary,Software
3,3PL,EQY,3P LEARNING LIMITED.. ...,ORDINARY,1.045,20170630,2017,3P Learning Limited. Ordinary,Software
4,4DS,EQY,4DS MEMORY LIMITED ...,ORDINARY,0.037,20170630,2017,4DS Memory Limited Ordinary,Computer Hardware


In [4]:
# Read the CSV file into a DataFrame

splits_data_df = pd.read_csv(splits_data_path)
splits_data_df.head()


Unnamed: 0,ASX Code,Splits,Year
0,AAU,0.2,2020
1,ACB,0.1,2024
2,ACB,0.08,2020
3,ACR,0.33,2021
4,ACR,0.25,2015


In [5]:
# Grouping data by ASX code and year, then summing splits

sum_splits = splits_data_df.groupby(["ASX Code","Year"])["Splits"].sum().reset_index()
sum_splits.head()

Unnamed: 0,ASX Code,Year,Splits
0,AAU,2020,0.2
1,ABR,2018,0.17
2,ABR,2020,3.5
3,ACB,2020,0.08
4,ACB,2024,0.1


In [6]:
# Read the CSV file into a DataFrame

dividends_data_df = pd.read_csv(dividends_data_path)
dividends_data_df.head()

Unnamed: 0,ASX Code,Dividends,Date
0,AAA,0.12,30/04/2024
1,AAA,0.14,27/03/2024
2,AAA,0.13,28/02/2024
3,AAA,0.12,30/01/2024
4,AAA,0.13,28/12/2023


In [7]:
# Convert Date column to datetime type

dividends_data_df["Date"] = pd.to_datetime(dividends_data_df["Date"],dayfirst=True)

# Extract Year from the Date column
# Adjust year based on whether date is before or after June 30th

dividends_data_df['Year'] = dividends_data_df['Date'].apply(lambda x: x.year - 1 if x.month < 6 else x.year)
dividends_data_df.head()

Unnamed: 0,ASX Code,Dividends,Date,Year
0,AAA,0.12,2024-04-30,2023
1,AAA,0.14,2024-03-27,2023
2,AAA,0.13,2024-02-28,2023
3,AAA,0.12,2024-01-30,2023
4,AAA,0.13,2023-12-28,2023


In [8]:
# Dropping the date column

dividends_data_df = dividends_data_df.drop(columns=["Date"])

# Grouping data by ASX code and year, then summing dividend amounts

sum_dividends = dividends_data_df.groupby(["ASX Code","Year"])["Dividends"].sum().reset_index()
sum_dividends.head()

Unnamed: 0,ASX Code,Year,Dividends
0,AAA,2020,0.16
1,AAA,2021,0.1
2,AAA,2022,0.76
3,AAA,2023,1.44
4,AAL,2014,0.4


In [9]:
# Merge splits and dividends data based on ASX code and year

merged_splits_dividends_data = pd.merge(sum_splits, sum_dividends, on=["ASX Code", "Year"], how='outer')
merged_splits_dividends_data.head(20)

Unnamed: 0,ASX Code,Year,Splits,Dividends
0,AAU,2020,0.2,
1,ABR,2018,0.17,1.22
2,ABR,2020,3.5,1.6
3,ACB,2020,0.08,
4,ACB,2024,0.1,
5,ACR,2015,0.25,1.64
6,ACR,2021,0.33,
7,ADS,2021,1.25,0.84
8,ADV,2014,20.0,
9,AEI,2022,0.05,


In [10]:
# merge merged_splits_dividends_data with stocks_data_df based on ASX code and year

complete_stock_data = pd.merge(stocks_data_df, merged_splits_dividends_data, on=["ASX Code", "Year"], how='left')
complete_stock_data.head(20)

Unnamed: 0,ASX Code,Security Group Code,Issuer Full Name,Product Description,Last Price($),Business Date,Year,Company Name,Sub-Industry,Splits,Dividends
0,1AD,EQY,ADALTA LIMITED ...,ORDINARY,0.28,20170630,2017,Adalta Limited Ordinary,Biotechnology & Medical Research,,
1,1AG,EQY,ALTERRA LIMITED ...,ORDINARY,0.024,20170630,2017,Alterra Limited Ordinary,Environmental Services & Equipment,,
2,3DP,EQY,POINTERRA LIMITED ...,ORDINARY,0.025,20170629,2017,Pointerra Limited Ordinary,Software,,
3,3PL,EQY,3P LEARNING LIMITED.. ...,ORDINARY,1.045,20170630,2017,3P Learning Limited. Ordinary,Software,,
4,4DS,EQY,4DS MEMORY LIMITED ...,ORDINARY,0.037,20170630,2017,4DS Memory Limited Ordinary,Computer Hardware,,
5,88E,EQY,88 ENERGY LIMITED ...,ORDINARY,0.057,20170630,2017,88 Energy Limited Ordinary,Oil & Gas Exploration and Production,,
6,8CO,EQY,8COMMON LIMITED ...,ORDINARY,0.033,20170622,2017,8COMMON Limited Ordinary,Software,,
7,8IH,EQY,8I HOLDINGS LTD ...,CDI 1:1,0.45,20170629,2017,8I Holdings Ltd CDI 1:1,Holding Companies,,
8,A2M,EQY,THE A2 MILK COMPANY LIMITED ...,ORDINARY,3.76,20170630,2017,The A2 Milk Company Limited Ordinary,Food Processing,,
9,A3D,EQY,AURORA LABS LIMITED ...,ORDINARY,1.07,20170630,2017,Aurora Labs Limited Ordinary,Electronic Equipment & Parts,,


In [11]:
# Replace null with appropriate values for Splits and Dividends columns

complete_stock_data["Splits"].fillna(0,inplace=True)
complete_stock_data["Splits"] = complete_stock_data["Splits"] + 1
complete_stock_data["Dividends"].fillna(0,inplace=True)
complete_stock_data.head(20)

Unnamed: 0,ASX Code,Security Group Code,Issuer Full Name,Product Description,Last Price($),Business Date,Year,Company Name,Sub-Industry,Splits,Dividends
0,1AD,EQY,ADALTA LIMITED ...,ORDINARY,0.28,20170630,2017,Adalta Limited Ordinary,Biotechnology & Medical Research,1.0,0.0
1,1AG,EQY,ALTERRA LIMITED ...,ORDINARY,0.024,20170630,2017,Alterra Limited Ordinary,Environmental Services & Equipment,1.0,0.0
2,3DP,EQY,POINTERRA LIMITED ...,ORDINARY,0.025,20170629,2017,Pointerra Limited Ordinary,Software,1.0,0.0
3,3PL,EQY,3P LEARNING LIMITED.. ...,ORDINARY,1.045,20170630,2017,3P Learning Limited. Ordinary,Software,1.0,0.0
4,4DS,EQY,4DS MEMORY LIMITED ...,ORDINARY,0.037,20170630,2017,4DS Memory Limited Ordinary,Computer Hardware,1.0,0.0
5,88E,EQY,88 ENERGY LIMITED ...,ORDINARY,0.057,20170630,2017,88 Energy Limited Ordinary,Oil & Gas Exploration and Production,1.0,0.0
6,8CO,EQY,8COMMON LIMITED ...,ORDINARY,0.033,20170622,2017,8COMMON Limited Ordinary,Software,1.0,0.0
7,8IH,EQY,8I HOLDINGS LTD ...,CDI 1:1,0.45,20170629,2017,8I Holdings Ltd CDI 1:1,Holding Companies,1.0,0.0
8,A2M,EQY,THE A2 MILK COMPANY LIMITED ...,ORDINARY,3.76,20170630,2017,The A2 Milk Company Limited Ordinary,Food Processing,1.0,0.0
9,A3D,EQY,AURORA LABS LIMITED ...,ORDINARY,1.07,20170630,2017,Aurora Labs Limited Ordinary,Electronic Equipment & Parts,1.0,0.0


In [12]:
#Saving the final file as a CSV

complete_stock_data.to_csv('resources\complete_stock_data.csv', index=False)