In [1]:
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)

In [2]:
url = "https://id.wikipedia.org/wiki/Daftar_miliarder_Forbes"

In [3]:
def scrape(url):
    logging.info(f"scrapping website url : {url}")
    return pd.read_html(url, header=None)

In [4]:
dfs = scrape(url)[1]

INFO:root:scrapping website url : https://id.wikipedia.org/wiki/Daftar_miliarder_Forbes


In [5]:
dfs

Unnamed: 0,No.,Nama,Kekayaan bersih (USD),Usia,Kebangsaan,Sumber kekayaan
0,,Jeff Bezos,$177 miliar,57,Amerika Serikat,Amazon
1,,Elon Musk,$151 miliar,49,Amerika Serikat,"Tesla, SpaceX"
2,,Bernard Arnault & keluarga,$150 miliar,72,Prancis,LVMH
3,,Bill Gates,$124 miliar,65,Amerika Serikat,Microsoft
4,,Mark Zuckerberg,$97 miliar,36,Amerika Serikat,Meta Platforms
5,,Warren Buffett,$96 miliar,90,Amerika Serikat,Berkshire Hathaway
6,,Larry Ellison,$93 miliar,76,Amerika Serikat,Oracle Corporation
7,,Larry Page,$91.5 miliar,48,Amerika Serikat,Alphabet Inc.
8,,Sergey Brin,$89 miliar,47,Amerika Serikat,Alphabet Inc.
9,,Mukesh Ambani,$84.5 miliar,63,India,Reliance Industries


In [30]:
dfs['No.'] = dfs.reset_index().index + 1

In [31]:
dfs

Unnamed: 0,No.,Nama,Kekayaan bersih (USD),Usia,Kebangsaan,Sumber kekayaan
0,1,Jeff Bezos,$177 miliar,57,Amerika Serikat,Amazon
1,2,Elon Musk,$151 miliar,49,Amerika Serikat,"Tesla, SpaceX"
2,3,Bernard Arnault & keluarga,$150 miliar,72,Prancis,LVMH
3,4,Bill Gates,$124 miliar,65,Amerika Serikat,Microsoft
4,5,Mark Zuckerberg,$97 miliar,36,Amerika Serikat,Meta Platforms
5,6,Warren Buffett,$96 miliar,90,Amerika Serikat,Berkshire Hathaway
6,7,Larry Ellison,$93 miliar,76,Amerika Serikat,Oracle Corporation
7,8,Larry Page,$91.5 miliar,48,Amerika Serikat,Alphabet Inc.
8,9,Sergey Brin,$89 miliar,47,Amerika Serikat,Alphabet Inc.
9,10,Mukesh Ambani,$84.5 miliar,63,India,Reliance Industries


In [32]:
import re

In [33]:
def is_money_miliar(string_money):
    return string_money.lower().endswith('miliar')

In [34]:
def transform_money_format(string_money):
    half_clean_string = string_money.lower().replace(',','.').replace(' ','')
    return re.sub(r"[?\[M\]miliar|\[J\]juta\[$\]]", "", half_clean_string)


In [35]:
def transform(df, tahun):
    logging.info("Transforming Dataframe....")

    column_mapping = {
        'No.' : 'nomor_urut',
        'Nama' : 'nama',
        'Kekayaan bersih (USD)' : 'kekayaan_bersih_usd',
        'Usia' : 'usia',
        'Kebangsaan' : 'kebangsaan',
        'Sumber kekayaan' : 'sumber_kekayaan'
    }

    
    renamed_df = df.rename(columns=column_mapping)

    renamed_df['tahun'] = tahun

    renamed_df['kekayaan_bersih_juta_usd'] = renamed_df['kekayaan_bersih_usd'].apply(
        lambda value: float(transform_money_format(value)) * 1000 if is_money_miliar(value) else float(transform_money_format(value))
    )

    return renamed_df[['nomor_urut','tahun','nama','usia','kebangsaan','kekayaan_bersih_juta_usd','sumber_kekayaan']]

In [36]:
df_2021 = transform(dfs, 2021)

INFO:root:Transforming Dataframe....


In [37]:
df_2021

Unnamed: 0,nomor_urut,tahun,nama,usia,kebangsaan,kekayaan_bersih_juta_usd,sumber_kekayaan
0,1,2021,Jeff Bezos,57,Amerika Serikat,177000.0,Amazon
1,2,2021,Elon Musk,49,Amerika Serikat,151000.0,"Tesla, SpaceX"
2,3,2021,Bernard Arnault & keluarga,72,Prancis,150000.0,LVMH
3,4,2021,Bill Gates,65,Amerika Serikat,124000.0,Microsoft
4,5,2021,Mark Zuckerberg,36,Amerika Serikat,97000.0,Meta Platforms
5,6,2021,Warren Buffett,90,Amerika Serikat,96000.0,Berkshire Hathaway
6,7,2021,Larry Ellison,76,Amerika Serikat,93000.0,Oracle Corporation
7,8,2021,Larry Page,48,Amerika Serikat,91500.0,Alphabet Inc.
8,9,2021,Sergey Brin,47,Amerika Serikat,89000.0,Alphabet Inc.
9,10,2021,Mukesh Ambani,63,India,84500.0,Reliance Industries


In [38]:
from sqlalchemy import create_engine

In [39]:
DB_NAME = 'postgres'
DB_USER = 'user1'
DB_PASSWORD = 'user1'
DB_HOST = '104.197.148.144'
DB_PORT = '5432'
CONNECTION_STRING = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
TABLE_NAME = 'wisnu_orang_terkaya_forbes'

In [40]:
CONNECTION_STRING

'postgresql://user1:user1@104.197.148.144:5432/postgres'

In [41]:
def write_to_postgres(df, db_name, table_name, connection_string):
    engine = create_engine(connection_string)
    logging.info(f'Writing dataframe to database : {db_name}, table:{table_name}.....')
    df.to_sql(name = table_name, con=engine, if_exists='replace', index=False)

In [43]:
write_to_postgres(df=df_2021, db_name=DB_NAME, table_name=TABLE_NAME, connection_string=CONNECTION_STRING)

INFO:root:Writing dataframe to database : postgres, table:wisnu_orang_terkaya_forbes.....


In [44]:
def read_from_postgres(db_name, table_name, connection_string):
    engine = create_engine(connection_string)

    logging.info(f"Reading postgres database: '{db_name}', table: '{table_name}' ...")
    return pd.read_sql_table(table_name, con=engine)

In [45]:
result_df = read_from_postgres(db_name=DB_NAME, table_name=TABLE_NAME, connection_string=CONNECTION_STRING)

INFO:root:Reading postgres database: 'postgres', table: 'wisnu_orang_terkaya_forbes' ...


In [46]:
print("Daftar Orang Terkaya Forbes:")
print(result_df.to_string())

Daftar Orang Terkaya Forbes:
   nomor_urut  tahun                        nama  usia       kebangsaan  kekayaan_bersih_juta_usd      sumber_kekayaan
0           1   2021                  Jeff Bezos    57  Amerika Serikat                  177000.0               Amazon
1           2   2021                   Elon Musk    49  Amerika Serikat                  151000.0        Tesla, SpaceX
2           3   2021  Bernard Arnault & keluarga    72          Prancis                  150000.0                 LVMH
3           4   2021                  Bill Gates    65  Amerika Serikat                  124000.0            Microsoft
4           5   2021             Mark Zuckerberg    36  Amerika Serikat                   97000.0       Meta Platforms
5           6   2021              Warren Buffett    90  Amerika Serikat                   96000.0   Berkshire Hathaway
6           7   2021               Larry Ellison    76  Amerika Serikat                   93000.0   Oracle Corporation
7           8   202