# Web Scrape Practice

In [2]:
import requests as reg
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime as dt
import Zach_Library as Zach

## API Practice 2 - Scrap Ibon Shop Information

### Example 1: Scrap from 1 city

#### Prepare API and parameter

In [1]:
api_url = 'https://www.ibon.com.tw/retail_inquiry_ajax.aspx'
form_data={
    'strTargetField':'COUNTY',
    'strKeyWords':'基隆市'
}

{'strTargetField': 'COUNTY', 'strKeyWords': '基隆市'}

In [None]:
resp=reg.post(api_url,form_data)
soup = BeautifulSoup(resp.text,'lxml')
soup 

#### Traversal the entire table to get raw data

> Method 1

In [6]:
Row = []
DataSet = []
count = 0

trs = soup.find('table').find_all('tr')
for tr in trs:
    for td in tr.find_all('td'):
        Row.append(td.text.strip())
        print(td.text.strip(),end='\t')
    DataSet.append(Row)
    print()
    Row = []

DataSet

> Method 2

In [None]:
Row = []
DataSet = []
count = 0

DataSet = [ [ td.text.strip() for td in tr.find_all('td') ] for tr in trs]
DataSet


#### Export data as Excel

In [26]:
df = pd.DataFrame(DataSet[1:],columns=DataSet[0])
df

Unnamed: 0,店號,店名,地址
0,112879,碇內,基隆市暖暖區源遠路158號160號
1,117140,德信,基隆市信義區東信路50號52號1樓
2,117896,深澳坑,基隆市信義區深澳坑路2-6號2-7號
3,118419,聖心,基隆市中山區西定路38號40號
4,123525,滿福,基隆市信義區深澳坑路166之30號1樓
...,...,...,...
83,970738,慶龍,基隆市仁愛區南榮路187號1樓
84,977142,源遠,基隆市暖暖區源遠路294號296號298號1樓
85,981233,極品,基隆市仁愛區仁三路19號21號1樓2樓
86,989086,仁五,基隆市仁愛區仁五路63號1樓


In [27]:
today = dt.now().strftime("%Y%m%d_%H%M%S")
df.to_csv(f'./Output/ibon_cityquery_{today}.csv', encoding='utf-8-sig')

### Function: Get ibon information from specific city

In [58]:
def List_GetIbonCity(strCityName):

    try:
        DataSet = []

        api_url = 'https://www.ibon.com.tw/retail_inquiry_ajax.aspx'
        form_data={
            'strTargetField':'COUNTY',
            'strKeyWords':strCityName
        }

        resp=reg.post(api_url,form_data)
        resp

        soup = BeautifulSoup(resp.text,'lxml')
        trs = soup.find('table').find_all('tr') 

        DataSet = [ [ td.text.strip() for td in tr.find_all('td') ] for tr in trs]
        return DataSet

    except Exception as e:
        print(f'Error:{e}')

### Example 2 - Get ibon information from all city

#### Test Zone

In [66]:


DataSet = []

api_url = 'https://www.ibon.com.tw/retail_inquiry_ajax.aspx'
form_data={
    'strTargetField':'COUNTY',
    'strKeyWords':'南海諸島'
}

resp=reg.post(api_url,form_data)
resp

soup = BeautifulSoup(resp.text,'lxml')
trs = soup.find('table').find_all('tr') 

DataSet = [ [ td.text.strip() for td in tr.find_all('td') ] for tr in trs]

DataSet


[['店號', '店名', '地址'], ['無符合條件的門市資料，請重新定義條件']]

#### Get City List

In [45]:
url = 'https://www.ibon.com.tw/retail_inquiry.aspx#gsc.tab=0'

soup = Zach.getSoup(url)
soup.find_all('select',id='Class1')
List_City = [ City_name.text for City_name in soup.find('select',id='Class1').find_all('option') ]
List_City

['台北市',
 '新北市',
 '基隆市',
 '宜蘭縣',
 '桃園市',
 '新竹市',
 '新竹縣',
 '苗栗縣',
 '台中市',
 '彰化縣',
 '南投縣',
 '雲林縣',
 '嘉義市',
 '嘉義縣',
 '台南市',
 '高雄市',
 '屏東縣',
 '花蓮縣',
 '台東縣',
 '澎湖縣',
 '金門縣',
 '連江縣',
 '南海諸島']

#### Traveral the city list and call function List_GetIbonCity, each time a dataset is responsed, export it as a sheet then push it into current excel file 

In [70]:
temp_Dataset = []

with pd.ExcelWriter(f'./Output/IbonStoryList_{today}.xlsx') as w:

    for city in List_City[0:22]:
        print(city)
        temp_Dataset = List_GetIbonCity(city)
        df = pd.DataFrame(temp_Dataset[1:],columns=temp_Dataset[0])
        df.to_excel(w, sheet_name=f'Ibon 門市_{city}', index=False)
        
        temp_Dataset = []



台北市
新北市
基隆市
宜蘭縣
桃園市
新竹市
新竹縣
苗栗縣
台中市
彰化縣
南投縣
雲林縣
嘉義市
嘉義縣
台南市
高雄市
屏東縣
花蓮縣
台東縣
澎湖縣
金門縣
連江縣


# Path to root library

In [71]:
import requests
requests 

<module 'requests' from 'c:\\Users\\USER\\anaconda3\\Lib\\site-packages\\requests\\__init__.py'>