# Get the Stock Code of the stock
- Stock codes are used because they provide a unique identifier for each stock
- download a pdf file "List_of_Companies_2022.06.24__Website_.pdf" 
from : https://www.bursamalaysia.com/sites/5d809dcf39fba22790cad230/assets/62b573f45b711a4597eb7706/List_of_Companies_2022.06.24__Website_.pdf 

### Import libaries 

In [1]:
import tabula
import pandas as pd

In [2]:
# Read the first page and specify the header
dfs = tabula.read_pdf("List_of_Companies_2022.06.24__Website_.pdf", pages='2-25', lattice=True)

# Loop through the list of DataFrames and remove the first two rows from each
dfs = [df.iloc[1:] for df in dfs]

# Concatenate the DataFrames into a single DataFrame
df = pd.concat(dfs, ignore_index=True)

In [3]:
df

Unnamed: 0.1,LISTING TEAM IN CHARGE,Unnamed: 0,Unnamed: 1,Unnamed: 2
0,1,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250,3
1,2,ABF MALAYSIA BOND INDEX FUND,0800EA,2
2,3,ABLE GLOBAL BERHAD,7167,3
3,4,ABLEGROUP BERHAD,7086,2
4,5,ABM FUJIYA BERHAD,5198,2
...,...,...,...,...
996,997,YX PRECIOUS METALS BHD,0250,4
997,998,ZECON BERHAD,7028,1
998,999,ZELAN BERHAD,2283,1
999,1000,ZEN TECH INTERNATIONAL BERHAD,0094,4


In [4]:
# delete columns that are not needed
del df['LISTING TEAM IN CHARGE']
del df['Unnamed: 2']

In [5]:
df

Unnamed: 0.1,Unnamed: 0,Unnamed: 1
0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250
1,ABF MALAYSIA BOND INDEX FUND,0800EA
2,ABLE GLOBAL BERHAD,7167
3,ABLEGROUP BERHAD,7086
4,ABM FUJIYA BERHAD,5198
...,...,...
996,YX PRECIOUS METALS BHD,0250
997,ZECON BERHAD,7028
998,ZELAN BERHAD,2283
999,ZEN TECH INTERNATIONAL BERHAD,0094


In [6]:
# rename the column headers
df.rename(columns = {'Unnamed: 0': 'stock_name', 'Unnamed: 1': 'stock_code'}, inplace = True)

In [7]:
df

Unnamed: 0,stock_name,stock_code
0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250
1,ABF MALAYSIA BOND INDEX FUND,0800EA
2,ABLE GLOBAL BERHAD,7167
3,ABLEGROUP BERHAD,7086
4,ABM FUJIYA BERHAD,5198
...,...,...
996,YX PRECIOUS METALS BHD,0250
997,ZECON BERHAD,7028
998,ZELAN BERHAD,2283
999,ZEN TECH INTERNATIONAL BERHAD,0094


### Remove duplicate stock code 

In [8]:
df[df['stock_code']=='5235SS']

Unnamed: 0,stock_name,stock_code
444,KLCC PROPERTY HOLDINGS BERHAD,5235SS
445,KLCC REAL ESTATE INVESTMENT TRUST,5235SS


In [9]:
df = df.drop_duplicates(subset='stock_code')

In [10]:
df[df['stock_code']=='5235SS']

Unnamed: 0,stock_name,stock_code
444,KLCC PROPERTY HOLDINGS BERHAD,5235SS


In [11]:
df

Unnamed: 0,stock_name,stock_code
0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250
1,ABF MALAYSIA BOND INDEX FUND,0800EA
2,ABLE GLOBAL BERHAD,7167
3,ABLEGROUP BERHAD,7086
4,ABM FUJIYA BERHAD,5198
...,...,...
996,YX PRECIOUS METALS BHD,0250
997,ZECON BERHAD,7028
998,ZELAN BERHAD,2283
999,ZEN TECH INTERNATIONAL BERHAD,0094


### Remove all the delisted stock code

- delisted stock : ['03009', '0400GA', '7206', '6548', '2925', '0401GA', '9474', '0400GB', '7205', '8761']

In [12]:
delisted_stock=['03009', '0400GA', '7206', '6548', '2925', '0401GA', '9474', '0400GB', '7205', '8761']

In [13]:
# remove rows that contain any of the values in the list
df = df[~df['stock_code'].isin(delisted_stock)]
df

Unnamed: 0,stock_name,stock_code
0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250
1,ABF MALAYSIA BOND INDEX FUND,0800EA
2,ABLE GLOBAL BERHAD,7167
3,ABLEGROUP BERHAD,7086
4,ABM FUJIYA BERHAD,5198
...,...,...
996,YX PRECIOUS METALS BHD,0250
997,ZECON BERHAD,7028
998,ZELAN BERHAD,2283
999,ZEN TECH INTERNATIONAL BERHAD,0094


# Connect to database
- load stock information(stock code , stock name) into database

### Import libraries

In [14]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine
from urllib.parse import quote 

### Connect to database
- connecting to a PostgreSQL database using the psycopg2 library

##### Steps
1. Create a database in PostgreSQL
2. The connection string follows the standard format for connecting to a PostgreSQL database using SQLAlchemy. It consists of the following elements:

    - 'postgresql://': the dialect and driver for SQLAlchemy to use for the connection.
    - 'postgres': the username to use for the connection.
    - '%s': a placeholder for the password to use for the connection.
    - '@localhost': the hostname of the database server to connect to.
    - '/dividend_investing_no2': the name of the database to connect to.
    - The password for the connection is passed to the quote function from the urllib library, which adds URL encoding to the string (e.g. replacing special characters with %-encoded equivalents). The result of the quote function is then used to replace the %s placeholder in the connection string.

In [15]:
conn_string = 'postgresql://postgres:%s@localhost/dividend_investing_no2'% quote('@SMWHot4')

In [16]:
db = create_engine(conn_string)
conn = db.connect()

###  write a pandas DataFrame (df) to a PostgreSQL database, and then retrieve and print the data stored in the database.

In [17]:
df.to_sql('stock_info', con=conn, if_exists='replace',
          index=False)
conn = psycopg2.connect(conn_string
                        )
conn.autocommit = True
cursor = conn.cursor()

sql1 = '''select * from stock_info;'''
cursor.execute(sql1)
for i in cursor.fetchall():
    print(i)

conn.commit()
conn.close()

('7-ELEVEN MALAYSIA HOLDINGS BERHAD', '5250')
('ABF MALAYSIA BOND INDEX FUND', '0800EA')
('ABLE GLOBAL BERHAD', '7167')
('ABLEGROUP BERHAD', '7086')
('ABM FUJIYA BERHAD', '5198')
('ACE INNOVATE ASIA BERHAD', '03028')
('ACME HOLDINGS BERHAD', '7131')
('ACO GROUP BERHAD', '0218')
('ADVANCE INFORMATION MARKETING BERHAD', '0122')
('ADVANCE SYNERGY BERHAD', '1481')
('ADVANCECON HOLDINGS BERHAD', '5281')
('ADVANCED PACKAGING TECHNOLOGY (M) BHD', '9148')
('ADVENTA BERHAD', '7191')
('AE MULTI HOLDINGS BERHAD', '7146')
('AEMULUS HOLDINGS BERHAD', '0181')
('AEON CO. (M) BHD', '6599')
('AEON CREDIT SERVICE (M) BERHAD', '5139')
('AFFIN BANK BERHAD', '5185')
('AGESON BERHAD', '7145')
('AHB HOLDINGS BERHAD', '7315')
('AHMAD ZAKI RESOURCES BERHAD', '7078')
('AIMFLEX BERHAD', '0209')
('AIRASIA X BERHAD', '5238')
('AJINOMOTO (MALAYSIA) BERHAD', '2658')
('AJIYA BERHAD', '7609')
('AL-`AQAR HEALTHCARE REIT', '5116')
('ALAM MARITIM RESOURCES BERHAD', '5115')
('ALCOM GROUP BERHAD', '2674')
('ALDRICH RESOURC

### Add '.KL ' to use it for yahoo finance

In [18]:
df['stock_code']=df['stock_code']+'.KL'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['stock_code']=df['stock_code']+'.KL'


In [19]:
df = df.reset_index()

In [20]:
df

Unnamed: 0,index,stock_name,stock_code
0,0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250.KL
1,1,ABF MALAYSIA BOND INDEX FUND,0800EA.KL
2,2,ABLE GLOBAL BERHAD,7167.KL
3,3,ABLEGROUP BERHAD,7086.KL
4,4,ABM FUJIYA BERHAD,5198.KL
...,...,...,...
985,996,YX PRECIOUS METALS BHD,0250.KL
986,997,ZECON BERHAD,7028.KL
987,998,ZELAN BERHAD,2283.KL
988,999,ZEN TECH INTERNATIONAL BERHAD,0094.KL


In [21]:
del df['index']

In [22]:
df

Unnamed: 0,stock_name,stock_code
0,7-ELEVEN MALAYSIA HOLDINGS BERHAD,5250.KL
1,ABF MALAYSIA BOND INDEX FUND,0800EA.KL
2,ABLE GLOBAL BERHAD,7167.KL
3,ABLEGROUP BERHAD,7086.KL
4,ABM FUJIYA BERHAD,5198.KL
...,...,...
985,YX PRECIOUS METALS BHD,0250.KL
986,ZECON BERHAD,7028.KL
987,ZELAN BERHAD,2283.KL
988,ZEN TECH INTERNATIONAL BERHAD,0094.KL


In [23]:
# store the dataframe into a csv file for further uses
df.to_csv('stock_code.csv')