# update GOCI-II crawler
The [GK2B_crawler.ipynb](https://github.com/chhyyi/aiffelthon/blob/main/GK2B_crawler.ipynb) just look for files on web page provided by OpenDAP, I planned update it to use OpenDAP api or open-api.   I've tried [open-api](https://www.nosc.go.kr/openapi/actionOpenApiIssue.do) first but there were only '.do' files!  
In the case of OPeNDAP, it seems like I should issue an id and password...  
Maybe it was just waste of time...


In [2]:
import urllib
import requests
import json
import pprint
import pandas as pd
import os
from pathlib import Path


In [29]:
auth_key = '' # issue key at this page https://www.nosc.go.kr/openapi/actionOpenApiIssue.do 


In [138]:
class GOCI2Downloader():
    """
    make request query to use open-API of GOCI-II sattelite,
    according to the document, 
    https://www.nosc.go.kr/boardContents/actionBoardContentsCons0010.do

    parameters: 
        slot = Division of region of GOCI-II. 13 means entire region.
    """
    def __init__(self, auth_key, start_date, end_date, slot='13', data_format="json", local_path="./download/"):
        self.date0 = str(start_date)
        self.date1 = str(end_date)
        self.auth_key = str(auth_key)
        self.slot = str(slot)
        self.format = str(data_format)
        self.local_path = local_path #local directory to save file


    def url(self):
        link=f'http://nosc.go.kr/openapi/GK2BMedia/search.do?ServiceKey={self.auth_key}&startDate={self.date0}&endDate={self.date1}&slot={self.slot}&ResultType={self.format}'
        return link

    def to_dataframe(self):
        r = requests.get(self.url())
        j = r.json()
        df = pd.DataFrame.from_dict(j)
        print('request result codes count(200 - okay, 400 failure):\n',df['resultCode'].value_counts())
        df = pd.DataFrame(df['data'].values.tolist())
        return df

    def download_file(self, url, local_path):
        """from Roman Podlinov, https://stackoverflow.com/a/16696317"""
        local_filename = url.split('/')[-1]
        # NOTE the stream=True parameter below
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            path = os.path.join(local_path, local_filename)
            with open(path, 'wb') as f:
                f.write(r.content)
        return local_filename

    def download_files(self, file_format='nc.nc4'):
        df = self.to_dataframe()
        df = df.loc[df['product']=='RI']
        #urls = df['filePath'].apply(lambda x: x.replace('.do',('.'+file_format))) #where is netCDF4 files????
        Path(f"./{self.local_path}").mkdir(exist_ok=True)
        for url in urls:
            self.download_file(url, self.local_path)
            print(f"attempt to download {url} finished.")
            
    

In [None]:
downloader = GOCI2Downloader(auth_key, 20220901, 20220905)
req = downloader.url()
print(req)

In [146]:
df=downloader.to_dataframe()
df

request result codes count(200 - okay, 400 failure):
 200    3589
Name: resultCode, dtype: int64


Unnamed: 0,sateName,sensor,sateLevel,area,product,slot,filePath,fileName,obsTimeUTC,obsTimeKST
0,GK2B,GOCI-II,L1B,한반도,,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2_GOCI2_L1B_20220831_231500_RLA_1,2022-08-31 23:15:00,2022-09-01 08:15:00
1,GK2B,GOCI-II,L1B,한반도,,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2_GOCI2_L1B_20220901_001500_RLA_1,2022-09-01 00:15:00,2022-09-01 09:15:00
2,GK2B,GOCI-II,L1B,한반도,,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2_GOCI2_L1B_20220901_011500_RLA_1,2022-09-01 01:15:00,2022-09-01 10:15:00
3,GK2B,GOCI-II,L1B,한반도,,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2_GOCI2_L1B_20220901_021500_RLA_1,2022-09-01 02:15:00,2022-09-01 11:15:00
4,GK2B,GOCI-II,L1B,한반도,,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2_GOCI2_L1B_20220901_031500_RLA_1,2022-09-01 03:15:00,2022-09-01 12:15:00
...,...,...,...,...,...,...,...,...,...,...
3584,GK2B,GOCI-II,L2,한반도,Kd_660,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2B_GOCI2_L2_20220904_081530_LA_Kd_660,2022-09-04 08:15:30,2022-09-04 17:15:30
3585,GK2B,GOCI-II,L2,한반도,IOP_bb555,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2B_GOCI2_L2_20220904_081530_LA_IOP_bb555,2022-09-04 08:15:30,2022-09-04 17:15:30
3586,GK2B,GOCI-II,L2,한반도,IOP_bb412,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2B_GOCI2_L2_20220904_081530_LA_IOP_bb412,2022-09-04 08:15:30,2022-09-04 17:15:30
3587,GK2B,GOCI-II,L2,한반도,IOP_a745,13,http://www.khoa.go.kr/nosc/data/satellite/GK2B...,GK2B_GOCI2_L2_20220904_081530_LA_IOP_a745,2022-09-04 08:15:30,2022-09-04 17:15:30


In [None]:
downloader.download_files()

In [147]:
print(df.loc[df['product']=='RI']['filePath'].values)

['http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/08/31/GK2B_GOCI2_L2_20220831_231530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_001530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_011530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_021530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_031530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_041530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_051530_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_061630_LA_RI.do'
 'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/09/01/GK2B_GOCI2_L2_20220901_071530_LA

# Troubleshooting : Missing nc(netCDF) files!

In [101]:
urls = df['filePath'].apply(lambda x: x.replace('.do','.nc'))

In [103]:
urls.values

array(['http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2021/12/31/GK2_GOCI2_L1B_20211231_231500_RLA_1.nc',
       'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/01/02/GK2_GOCI2_L1B_20220102_071500_RLA_1.nc',
       'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2022/01/02/GK2_GOCI2_L1B_20220102_041500_RLA_1.nc',
       ...,
       'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2021/12/31/GK2B_GOCI2_L2_20211231_231530_LA_SRL_SFC443.nc',
       'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2021/12/31/GK2B_GOCI2_L2_20211231_231530_LA_SRL_SFC709.nc',
       'http://www.khoa.go.kr/nosc/data/satellite/GK2B/GOCI-II/L1B/2021/12/31/GK2B_GOCI2_L2_20211231_231530_LA_IOP_a745.nc'],
      dtype=object)