In [61]:
# Dependencies and Setup
import pandas as pd


In [63]:
# Load dividends_data.csv and using only the 'ASX Code' and 'Dividend' columns
dividends_data = pd.read_csv('resources/dividends_data.csv')

# Convert 'Date' column to datetime format & extract year from 'Date' column
dividends_data['Date'] = pd.to_datetime(dividends_data['Date'], dayfirst=True)
dividends_data['Year'] = dividends_data['Date'].dt.year

# Sort the DataFrame by 'Year' and 'ASX Code' & group by 'Year' and sum up the totals for each year
dividends_data_sorted = dividends_data.sort_values(by=['Year', 'ASX Code'])
yearly_totals = dividends_data_sorted.groupby(['ASX Code', 'Year'])['Dividends'].sum().reset_index()

# Load splits_data.csv and using only the 'Year' and 'Splits' columns
splits_data = pd.read_csv('resources/splits_data.csv', usecols=['ASX Code', 'Splits', 'Year'])

# Load merged_industries.csv to merge other files into
combined_data = pd.read_csv('resources/merged_industries.csv')


In [66]:
# Merge dividends_data with combined_data based on 'ASX Code' and 'Year'
combined_data = pd.merge(combined_data, yearly_totals, left_on='ASX Code', right_on='ASX Code', how='left')

# Check for and sort duplicate index labels in splits_data
#if splits_data.index.duplicated().any():
    #splits_data = splits_data[~splits_data.index.duplicated(keep='first')]

    

In [69]:
# Merge splits_data with combined_data based on 'Year', specifying custom suffixes
combined_data = pd.merge(combined_data, splits_data, on=['Year', 'ASX Code'], how='left', suffixes=('', '_splits'))

# Drop duplicate columns
combined_data.drop(columns=['Splits_splits'], inplace=True)
combined_data.drop(columns=['Year_y'], inplace=True)
combined_data.drop(columns=['Splits_splits'], inplace=True)

# Drop duplicate rows
#combined_data_cleaned = combined_data.drop_duplicates()

combined_data.head(50)


Unnamed: 0,ASX Code,Security Group Code,Issuer Full Name,Product Description,Last Price($),Business Date,Year_x,Company Name,Sub-Industry,Dividends_x,Year,Dividends_y,Splits
0,1AD,EQY,ADALTA LIMITED ...,ORDINARY,0.28,20170630,2017,Adalta Limited Ordinary,Biotechnology & Medical Research,,,,
1,1AG,EQY,ALTERRA LIMITED ...,ORDINARY,0.024,20170630,2017,Alterra Limited Ordinary,Environmental Services & Equipment,,,,
2,1AL,EQY,ONEALL INTERNATIONAL LIMITED ...,ORDINARY,0.91,20170630,2017,,,,,,
3,1PG,EQY,1-PAGE LIMITED ...,ORDINARY,0.165,20170308,2017,,,,,,
4,1ST,EQY,1ST GROUP LIMITED ...,ORDINARY,0.026,20170628,2017,,,,,,
5,3DP,EQY,POINTERRA LIMITED ...,ORDINARY,0.025,20170629,2017,Pointerra Limited Ordinary,Software,,,,
6,3PL,EQY,3P LEARNING LIMITED.. ...,ORDINARY,1.045,20170630,2017,3P Learning Limited. Ordinary,Software,,,,
7,4CE,EQY,FORCE COMMODITIES LIMITED ...,ORDINARY,0.018,20170630,2017,,,,,,
8,4DS,EQY,4DS MEMORY LIMITED ...,ORDINARY,0.037,20170630,2017,4DS Memory Limited Ordinary,Computer Hardware,,,,
9,4WD,EQY,AUTOMOTIVE SOLUTIONS GROUP LTD ...,ORDINARY,0.35,20170630,2017,,,,,,


In [15]:
combined_data.to_csv('resources\dividends_splits_merged.csv', index=False)
