# GPM 데이터 다운로드
Python 내장 라이브러리를 사용한 NASA GESDISC GPM 데이터 다운로드 예제

In [28]:
import os
import urllib.request
import http.cookiejar
from datetime import datetime, timedelta

In [29]:
def download_gpm_file(url, output_folder):
    """
    Download GPM file from NASA GESDISC using urllib with cookie handling
    
    Args:
        url (str): URL of the GPM file
        output_folder (str): Folder to save the downloaded file
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    filename = url.split('/')[-1]
    output_path = os.path.join(output_folder, filename)
    
    try:
        username = "rlawngns66"
        password = "Wogh+1027"
        
        # Create a password manager and cookie jar
        password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
        password_mgr.add_password(None, "https://urs.earthdata.nasa.gov", username, password)
        
        # Create cookie jar and handlers
        cookie_jar = http.cookiejar.CookieJar()
        cookie_handler = urllib.request.HTTPCookieProcessor(cookie_jar)
        password_handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
        
        # Build opener with both handlers
        opener = urllib.request.build_opener(cookie_handler, password_handler)
        
        # Install the opener
        urllib.request.install_opener(opener)
        
        # Download the file
        print(f"Downloading {filename}...")
        request = urllib.request.Request(url)
        with opener.open(request) as response:
            with open(output_path, 'wb') as out_file:
                out_file.write(response.read())
        print(f"Downloaded {filename} successfully")
        
    except Exception as e:
        print(f"Error downloading file: {str(e)}")
        raise

In [30]:
def generate_gpm_urls(start, end):
    """
    Generate URLs for GPM files between start and end dates
    """
    base_url = "https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07"
    urls = []
    
    # Convert string dates to datetime objects
    start = datetime.strptime(start, "%Y%m%d")
    end = datetime.strptime(end, "%Y%m%d")
    
    current_date = start
    while current_date <= end:
        # GPM data is available every 30 minutes (48 files per day)
        index = 0  # Start index for each day (0000, 0030, 0060, ...)
        
        for hour in range(24):
            for minute in [0, 30]:
                # Format the date and time components
                year = current_date.strftime('%Y')
                doy = current_date.strftime('%j')
                date_str = current_date.strftime('%Y%m%d')
                
                # Create the filename
                start_time = f"{hour:02d}{minute:02d}00"
                
                # Calculate end time properly
                if minute == 0:
                    end_hour = hour
                    end_minute = 29
                else:
                    end_hour = hour
                    end_minute = 59
                
                end_time = f"{end_hour:02d}{end_minute:02d}59"
                
                # Format index as 4 digits (0000, 0030, 0060, ...)
                index_str = f"{index:04d}"
                
                filename = f"3B-HHR-E.MS.MRG.3IMERG.{date_str}-S{start_time}-E{end_time}.{index_str}.V07B.HDF5"
                
                # Construct the full URL
                url = f"{base_url}/{year}/{doy}/{filename}"
                urls.append(url)
                
                # Increment index by 30 for next file
                index += 30
        
        current_date += timedelta(days=1)
    
    return urls

In [31]:
def download_gpm_files(start_date, end_date, output_folder):
    """
    Download GPM files for a specific date range
    
    Args:
        start_date (str): Start date in 'YYYYMMDD' format
        end_date (str): End date in 'YYYYMMDD' format
        output_folder (str): Folder to save the downloaded files
    """
    urls = generate_gpm_urls(start_date, end_date)
    total_files = len(urls)
    
    print(f"Found {total_files} files to download")
    
    for i, url in enumerate(urls, 1):
        try:
            print(f"\nDownloading file {i}/{total_files}")
            download_gpm_file(url, output_folder)
        except Exception as e:
            print(f"Error downloading {url}: {str(e)}")
            continue

In [32]:
# 예제 사용
start_date = "20240117"  # 2024년 1월 17일
end_date = "20240117"    # 2024년 1월 17일
output_folder = "../data/GPM"

download_gpm_files(start_date, end_date, output_folder)

Found 48 files to download

Downloading file 1/48
Downloading 3B-HHR-E.MS.MRG.3IMERG.20240117-S000000-E002959.0000.V07B.HDF5...
Downloaded 3B-HHR-E.MS.MRG.3IMERG.20240117-S000000-E002959.0000.V07B.HDF5 successfully

Downloading file 2/48
Downloading 3B-HHR-E.MS.MRG.3IMERG.20240117-S003000-E005959.0030.V07B.HDF5...
Downloaded 3B-HHR-E.MS.MRG.3IMERG.20240117-S003000-E005959.0030.V07B.HDF5 successfully

Downloading file 3/48
Downloading 3B-HHR-E.MS.MRG.3IMERG.20240117-S010000-E012959.0060.V07B.HDF5...
Downloaded 3B-HHR-E.MS.MRG.3IMERG.20240117-S010000-E012959.0060.V07B.HDF5 successfully

Downloading file 4/48
Downloading 3B-HHR-E.MS.MRG.3IMERG.20240117-S013000-E015959.0090.V07B.HDF5...
Downloaded 3B-HHR-E.MS.MRG.3IMERG.20240117-S013000-E015959.0090.V07B.HDF5 successfully

Downloading file 5/48
Downloading 3B-HHR-E.MS.MRG.3IMERG.20240117-S020000-E022959.0120.V07B.HDF5...
Downloaded 3B-HHR-E.MS.MRG.3IMERG.20240117-S020000-E022959.0120.V07B.HDF5 successfully

Downloading file 6/48
Downloadin