## Scrapping of outlet information for the 4 major supermarkets in Singapore

###### 1) Giant Hypermarket
###### 2) Cold Storage 
###### 3) Sheng Siong
###### 4) NTUC Fairprice

In [1]:
import requests
import re
from bs4 import BeautifulSoup
import json 
import numpy as np
import pandas as pd
from tqdm import tqdm

postalCodes = []

### 1) Scrapping of Giant Hypermarket Store Locator

In [2]:
supermarket_name_1 = "Giant Hypermart"
page = requests.get("https://giant.sg/store-locator")
soup = BeautifulSoup(page.content, 'html.parser')
scriptTags = soup.find_all('script', type="text/javascript")

scriptContent = []
for script in scriptTags:
    tagContent = script.get_text()
    if "function initMap()" in tagContent:
        scriptContent = tagContent        
        break;    

In [3]:
soup2 = BeautifulSoup(scriptContent, 'html.parser')
divTags = soup2.find_all('div', {"class": "map-info"})
for item in divTags:
    address = item.find('p', {'class': 'value'})
    postal_code = address.get_text().split(" ")[-1]

    if (len(postal_code) != 6):
        postal_code = postal_code.strip('[" ", "(",")","S"]')
    postalCodes.append({"supermarket" : supermarket_name_1,"postal_code" : postal_code})

### 2) Scrapping of Cold Storage Store Locator

In [4]:
supermarket_name_2 = "Cold Storage"
page = requests.get("https://coldstorage.com.sg/store-locator")
soup = BeautifulSoup(page.content, 'html.parser')
scriptTags = soup.find_all('script', type="text/javascript")
scriptContent = []
for script in scriptTags:
    tagContent = script.get_text()
    if "function initMap()" in tagContent:
        scriptContent = tagContent        
        break;    

In [5]:
soup2 = BeautifulSoup(scriptContent, 'html.parser')
divTags = soup2.find_all('div', {"class": "map-info"})
for item in divTags:
    address = item.find('p', {'class': 'value'})
    postal_code = address.get_text().split(" ")[-1]

    if (len(postal_code) != 6):
        postal_code = postal_code.strip('[" ", "(",")","S"]')
    postalCodes.append({"supermarket" : supermarket_name_2,"postal_code" : postal_code})

### 3) Scrapping of Sheng Siong Store Locator

In [6]:
supermarket_name_3 = "Sheng Siong"
page = requests.get("http://www.shengsiong.com.sg/pages/store-locator.html")
soup = BeautifulSoup(page.content, 'html.parser')
columns = soup.find_all('td')

In [7]:
for column in columns:
    outlet = column.find('div', {"class" : "map-location"})
    if (outlet):
        tags = outlet.find('a')     
        postal_code = tags.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling.split(" ")[-1]
        postal_code = postal_code.strip('[" ", "(",")"]')
        if (len(postal_code) == 6):
            postalCodes.append({"supermarket" : supermarket_name_3,"postal_code" : postal_code})

### 4) Scrapping of NTUC Store Locator

In [8]:
supermarket_name_4 = "NTUC"
url = 'https://www.fairprice.com.sg/store-locator'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
headers = {'User-Agent': user_agent}
page = requests.get(url,headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')

In [9]:
string = soup.find("script", {'id' : '__NEXT_DATA__'}).get_text()
res = json.loads(string) 
outlets = res["props"]["pageProps"]["data"]["fpstores"]

In [10]:
for outlet in outlets:
    postal_code = outlet['postalCode'] 
    postal_code = postal_code.strip('[" ", "(",")","S"]')
    if (len(postal_code) == 6):
        postalCodes.append({"supermarket" : outlet['storeType'],"postal_code" : postal_code})

### Using OneMap API to retrieve location details

In [11]:
def getPostalInfo(postal_code):
    request = requests.get(
            'https://developers.onemap.sg/commonapi/search?searchVal='+ str(postal_code) + '&returnGeom=Y&getAddrDetails=Y'
        )
    result = request.json()
    return result;

In [12]:
output_df = pd.DataFrame()
tq = tqdm(postalCodes)
for outlet in postalCodes:
    tq.set_description(f"Retrieving {outlet} information")
    results = getPostalInfo(outlet["postal_code"])['results']
    if (results):
        rowOutput = {'shop' : outlet['supermarket'], 'location' : results[0]['ROAD_NAME'].title(), 'latitude' : results[0]["LATITUDE"],'longitude':results[0]["LONGITUDE"]}
        output_df = output_df.append(rowOutput, ignore_index=True)
        
output_df.to_csv("retailer.csv")

Retrieving {'supermarket': 'FairPrice', 'postal_code': '310192'} information:   0%|          | 0/317 [07:00<?, ?it/s]        