In [2]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display
InteractiveShell.ast_node_interactivity = "all"

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
!pip install -Uqq pandas investpy python-dateutil networkx matplotlib scikit-learn great-expectations seaborn

In [5]:
import os
import time
import pandas as pd
import matplotlib.pyplot as plt
import investpy
from pathlib import Path
from dateutil.relativedelta import relativedelta
from dataclasses import dataclass, field
from typing import List

In [6]:
%matplotlib inline

In [7]:
# Global variables and constants

COUNTRY = "malaysia"                   # use investpy.get_stock_countries() to list supported countries
PAST_YEARS = [2018, 2019, 2020]
PE_RATIO = 9
EXPECTED_DIVIDENDS_YIELDS_PERCENTAGE = 8
DATASET_FOLDER = "test_data"
TARGET_DATASET_FOLDER = f"{DATASET_FOLDER}/{COUNTRY}"
STOCKS_DATASET = f"{TARGET_DATASET_FOLDER}/stocks.csv"
STOCKS_INFO_DATASET = f"{TARGET_DATASET_FOLDER}/stocks_info.csv"
STOCKS_FINANCE_DATASET = f"{TARGET_DATASET_FOLDER}/stocks_finance.csv"
STOCKS_DIVIDENDS_DATASET = f"{TARGET_DATASET_FOLDER}/stocks_dividends.csv"

In [8]:
def create_folder(folder):
    """Create folder if not exists"""
    if not os.path.exists(folder):
        os.makedirs(folder)

@dataclass
class DividendYield:
    year:int = 0
    percentage: float = 0.0
    
@dataclass
class Stock:
    country: str = ''
    name: str = ''
    symbol: str = ''
    pe_ratio: float = 0.0
    yields: List[int] = field(default_factory=list)

# Create target folder
create_folder(TARGET_DATASET_FOLDER)

In [9]:
# Get stocks for the country
df_stocks = investpy.get_stocks(country=COUNTRY)
df_stocks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   country    929 non-null    object
 1   name       929 non-null    object
 2   full_name  929 non-null    object
 3   isin       929 non-null    object
 4   currency   929 non-null    object
 5   symbol     929 non-null    object
dtypes: object(6)
memory usage: 43.7+ KB


In [10]:
display(df_stocks.head(10))

Unnamed: 0,country,name,full_name,isin,currency,symbol
0,malaysia,Media Chinese Int,Media Chinese International Ltd,BMG5959D1048,MYR,MDCH
1,malaysia,AMMB,AMMB Holdings Bhd,MYL1015OO006,MYR,AMMB
2,malaysia,CIMB Group,CIMB Group Holdings Bhd,MYL1023OO000,MYR,CIMB
3,malaysia,RHB Bank,RHB Bank Bhd,MYL1066OO009,MYR,RHBC
4,malaysia,Hong Leong Financial,Hong Leong Financial Group Bhd,MYL1082OO006,MYR,HLCB
5,malaysia,Malayan Banking,Malayan Banking Bhd,MYL1155OO000,MYR,MBBM
6,malaysia,Malaysia Building,Malaysia Building Society Bhd,MYL1171OO007,MYR,MBSS
7,malaysia,Public Bank,Public Bank Bhd,MYL1295OO004,MYR,PUBM
8,malaysia,Berjaya Sports Toto,Berjaya Sports Toto Bhd,MYL1562OO007,MYR,BSTB
9,malaysia,DRB Hicom,DRB - Hicom Bhd,MYL1619OO005,MYR,DRBM


In [11]:
def save_csv(df, file_name):
    df.to_csv(file_name, header=True, index=False)

# Save stock list
save_csv(df_stocks, STOCKS_DATASET)

In [30]:
def get_stock_info(symbol, country):
    try:
        return investpy.get_stock_information(symbol, country)
    except:
        return None
    
def get_stock_dividends(symbol, country):
    try:
        return investpy.get_stock_dividends(symbol, country)
    except:
        return None
    
def read_csv(file):
    if not os.path.isfile(file):
        return None
    return pd.read_csv(file)

def download_stocks_info(df):
    df_stocks_info = None
    count = 0
    for _, row in df.iterrows():
        count = count + 1
        print(f"{count}/{len(df)}: {row.symbol}-{row['name']}")
        df_stock = get_stock_info(row.symbol, row.country)
        if df_stocks_info is None:
            df_stocks_info  = df_stock
        else:
            df_stocks_info = df_stocks_info.append(df_stock)
        if count % 10 == 0:
            save_csv(df_stocks_info, STOCKS_INFO_DATASET)
            time.sleep(3)
        
def download_stocks_dividends(df):
    df_stocks_dividends = None
    count = 0
    for _, row in df.iterrows():
        count = count + 1
        print(f"{count}/{len(df)}: {row.symbol}-{row['name']}")
        df_stock = get_stock_dividends(row.symbol, row.country)
        df_stock['symbol'] = row.symbol
        if df_stocks_dividends is None:
            df_stocks_dividends  = df_stock
        else:
            df_stocks_dividends = df_stocks_dividends.append(df_stock)
        if count % 10 == 0:
            save_csv(df_stocks_dividends, STOCKS_DIVIDENDS_DATASET)
            time.sleep(3)
            break

In [28]:
#download_stocks_info(df_stocks)

download_stocks_dividends(df_stocks)

1/ 929: MDCH - Media Chinese Int
2/ 929: AMMB - AMMB
3/ 929: CIMB - CIMB Group
4/ 929: RHBC - RHB Bank
5/ 929: HLCB - Hong Leong Financial
6/ 929: MBBM - Malayan Banking
7/ 929: MBSS - Malaysia Building
8/ 929: PUBM - Public Bank
9/ 929: BSTB - Berjaya Sports Toto
10/ 929: DRBM - DRB Hicom
