Skip to content

Commit

Permalink
Prompt user if HIP/TYC files already downloaded
Browse files Browse the repository at this point in the history
  • Loading branch information
ajtribick committed Jun 5, 2021
1 parent d55c61e commit 69cea95
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 38 deletions.
57 changes: 29 additions & 28 deletions celestia_gaia/download_data.py
Expand Up @@ -27,7 +27,7 @@
from astroquery.gaia import Gaia
from astroquery.xmatch import XMatch

from .directories import GAIA_DR2_DIR, GAIA_EDR3_DIR, VIZIER_DIR, XMATCH_DIR
from .directories import GAIA_EDR3_DIR, VIZIER_DIR, XMATCH_DIR
from .ranges import MultiRange


Expand Down Expand Up @@ -237,19 +237,9 @@ def _tyc_query(start: int, end: int) -> str:
"""


def download_gaia_hip(chunk_size: int = 5000, force: bool = False) -> None:
def download_gaia_hip(ranges: MultiRange, chunk_size: int = 5000) -> None:
"""Download HIP data from the Gaia archive."""
required_ranges = MultiRange(1, _HIP_MAX)
if not force:
pattern = re.compile(r'gaiaedr3-hip2-([0-9]+)-([0-9]+)\.votable')
for existing in GAIA_EDR3_DIR.glob('gaiaedr3-hip2-*.votable'):
match = pattern.match(existing.name)
if match:
groups = match.groups()
required_ranges.remove(int(groups[0]), int(groups[1]))
required_ranges.chunk(chunk_size)

for subrange in required_ranges.ranges:
for subrange in ranges.chunk_ranges(chunk_size):
hip_file = GAIA_EDR3_DIR/f'gaiaedr3-hip2-{subrange.begin:06}-{subrange.end:06}.votable'

query = _hip_query(subrange.begin, subrange.end)
Expand Down Expand Up @@ -281,19 +271,9 @@ def download_gaia_hip(chunk_size: int = 5000, force: bool = False) -> None:
Gaia.remove_jobs([job.jobid])


def download_gaia_tyc(chunk_size: int = 20, force: bool = False) -> None:
def download_gaia_tyc(ranges: MultiRange, chunk_size: int = 20) -> None:
"""Download TYC/TDSC data from the Gaia archive."""
required_ranges = MultiRange(1, _TYC_MAX)
if not force:
pattern = re.compile(r'gaiaedr3-tyctdsc-([0-9]+)-([0-9]+)\.votable')
for existing in GAIA_EDR3_DIR.glob('gaiaedr3-tyctdsc-*.votable'):
match = pattern.match(existing.name)
if match:
groups = match.groups()
required_ranges.remove(int(groups[0]), int(groups[1]))
required_ranges.chunk(chunk_size)

for subrange in required_ranges.ranges:
for subrange in ranges.chunk_ranges(chunk_size):
hip_file = (
GAIA_EDR3_DIR/f'gaiaedr3-tyctdsc-part{subrange.begin:04}-{subrange.end:04}.votable'
)
Expand Down Expand Up @@ -327,13 +307,34 @@ def download_gaia_tyc(chunk_size: int = 20, force: bool = False) -> None:
Gaia.remove_jobs([job.jobid])


_RANGE_PATTERN = re.compile(r'-([0-9]+)-([0-9]+)$')


def _getranges(start: int, end: int, path: Path, pattern: str) -> MultiRange:
required_ranges = MultiRange(start, end)
for existing in path.glob(pattern):
match = _RANGE_PATTERN.search(existing.stem)
if match:
groups = match.groups()
required_ranges.remove(int(groups[0]), int(groups[1]))
return required_ranges


def download_gaia() -> None:
"""Download data from the Gaia archive."""
GAIA_DR2_DIR.mkdir(parents=True, exist_ok=True)
GAIA_EDR3_DIR.mkdir(parents=True, exist_ok=True)

download_gaia_hip()
download_gaia_tyc()
hip_ranges = _getranges(1, _HIP_MAX, GAIA_EDR3_DIR, 'gaiaedr3-hip2-*.votable')
if not hip_ranges:
if _yesno('Hipparcos cross-match data already downloaded, replace?'):
hip_ranges = MultiRange(1, _HIP_MAX)
download_gaia_hip(hip_ranges)

tyc_ranges = _getranges(1, _TYC_MAX, GAIA_EDR3_DIR, 'gaiaedr3-tyctdsc-*.votable')
if not tyc_ranges:
if _yesno('Tycho cross-match data already downloaded, replace?'):
tyc_ranges = MultiRange(1, _TYC_MAX)
download_gaia_tyc(tyc_ranges)


# --- SAO XMATCH DOWNLOAD ---
Expand Down
18 changes: 8 additions & 10 deletions celestia_gaia/ranges.py
Expand Up @@ -19,7 +19,7 @@

from __future__ import annotations

from typing import List
from typing import Iterator, List


class Range:
Expand Down Expand Up @@ -80,6 +80,9 @@ class MultiRange:
def __init__(self, begin: int, end: int) -> None:
self.ranges = [Range(begin, end)]

def __bool__(self) -> bool:
return len(self.ranges) > 0

def remove(self, *args) -> None:
"""Removes a range from the set of ranges."""
if len(args) == 1:
Expand All @@ -98,13 +101,8 @@ def remove(self, *args) -> None:

self.ranges = new_ranges

def chunk(self, chunk_size) -> None:
"""Splits the constituent ranges into chunks of at most chunk_size."""
new_ranges = []
def chunk_ranges(self, chunk_size: int) -> Iterator[Range]:
"""Returns the constituent ranges with a maximum chunk size."""
for subrange in self.ranges:
new_ranges += subrange.chunks(chunk_size)
self.ranges = new_ranges

def is_empty(self) -> bool:
"""Checks whether this MultiRange is empty."""
return len(self.ranges) > 0
for chunk in subrange.chunks(chunk_size):
yield chunk

0 comments on commit 69cea95

Please sign in to comment.