In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
import pyodbc

In [2]:
urlStr = "https://www.muslimpro.com/en/find"

In [3]:
def sanitizeParamValues(value):
    return value.replace(" ", "%20").replace(",", "%2C")

def generateUrlReq(strUrl, yearMonth):
    dictParams = {
        # required
        "country_code": "MY",
        "country_name": "Malaysia",
        "city_name": "Kuala Lumpur",
        "coordinates": "3.1499222,101.6944619",
        # optional
        "convention": "precalc",
        "date": yearMonth
    }

    strParams = ""

    listDictParams = [param for idx, param in enumerate(dictParams.items())]
    for i in range(len(dictParams)):
        if (i == 0):
            strParams += ("?" + listDictParams[0][0] + "=" + sanitizeParamValues(listDictParams[0][1]))
        else:
            strParams += ("&" + listDictParams[i][0] + "=" + sanitizeParamValues(listDictParams[i][1]))
    
    if (len(dictParams) > 0):
        return strUrl + strParams
    else:
        return strUrl

def extractTimesTable(sitePlainText):
    soup = BeautifulSoup(sitePlainText)
    return soup.find("table", { "class": "prayer-times" })

def parseStringIO(literalHtml):
    return StringIO(str(literalHtml))

def getDataFrameFromReq(strUrl, yearMonthInput):
    urlReq = generateUrlReq(strUrl, yearMonthInput)
    sitePlainText = requests.get(urlReq).text
    extractedTable = extractTimesTable(sitePlainText)
    df = pd.read_html(parseStringIO(extractedTable))[0]
    return df

def parseDFforDB(parsedDF):
    if(len(parsedDF) == 0):
        return pd.DataFrame([])
    
    df = parsedDF[["Unnamed: 0", "Fajr", "Dhuhr", "Asr", "Maghrib", "Isha'a"]]
    df.loc[:,"Unnamed: 0"] = pd.to_datetime(df["Unnamed: 0"] + " 2024", format="%a %d %b %Y").dt.date
    df.loc[:,"Fajr"] = pd.to_datetime(df["Fajr"], format="%H:%M").dt.time
    df.loc[:,"Dhuhr"] = pd.to_datetime(df["Dhuhr"], format="%H:%M").dt.time
    df.loc[:,"Asr"] = pd.to_datetime(df["Asr"], format="%H:%M").dt.time
    df.loc[:,"Maghrib"] = pd.to_datetime(df["Maghrib"], format="%H:%M").dt.time
    df.loc[:,"Isha'a"] = pd.to_datetime(df["Isha'a"], format="%H:%M").dt.time
    
    df = df.rename(columns={ "Unnamed: 0": "Date", "Dhuhr": "Zuhr", "Isha'a": "Isha"})
    return df

def insertDFtoDB(dfToInsert):
    try:
        connStr = "DRIVER={ODBC Driver 17 for SQL Server};SERVER=INBOOK_X1;DATABASE=PrayerTimesDW;UID=sa;PWD=abcdE!2345;"
        conn = pyodbc.connect(connStr)
        cursor = conn.cursor()

        for idx, row in dfToInsert.iterrows():
            cursor.execute("INSERT INTO [dbo].[t_time_prayer_source_3] ([time_date],[time_fajr],[time_zuhr],[time_asr],[time_maghrib],[time_isha]) VALUES (?,?,?,?,?,?)", row["Date"], row["Fajr"], row["Zuhr"], row["Asr"], row["Maghrib"], row["Isha"])

        conn.commit()
        cursor.close()
        conn.close()

        print("SUCCESS! Done INSERT INTO database")
    except:
        print("INSERT INTO database FAILED!")

In [4]:
muslimProDataJan2024 = getDataFrameFromReq(urlStr, "2024-01")
dfJan2024 = parseDFforDB(muslimProDataJan2024)
dfJan2024

Unnamed: 0,Date,Fajr,Zuhr,Asr,Maghrib,Isha
0,2024-01-01,06:06:00,13:19:00,16:42:00,19:16:00,20:31:00
1,2024-01-02,06:07:00,13:20:00,16:42:00,19:17:00,20:31:00
2,2024-01-03,06:07:00,13:20:00,16:43:00,19:17:00,20:32:00
3,2024-01-04,06:08:00,13:21:00,16:43:00,19:18:00,20:32:00
4,2024-01-05,06:08:00,13:21:00,16:44:00,19:18:00,20:33:00
5,2024-01-06,06:09:00,13:22:00,16:44:00,19:19:00,20:33:00
6,2024-01-07,06:09:00,13:22:00,16:44:00,19:19:00,20:34:00
7,2024-01-08,06:10:00,13:22:00,16:45:00,19:20:00,20:34:00
8,2024-01-09,06:10:00,13:23:00,16:45:00,19:20:00,20:34:00
9,2024-01-10,06:10:00,13:23:00,16:46:00,19:21:00,20:35:00


In [5]:
# insertDFtoDB(dfJan2024)

SUCCESS! Done INSERT INTO database
