# Generate CAR files for each Landsat scene

## Imports

In [13]:
import json
import os
import subprocess
import tarfile
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv

load_dotenv()

True

## Setup source and destination directories

In [2]:
source_tar_dir = Path("../../data/input/")
source_tars = list(source_tar_dir.glob("*.tar"))
extracted_tar_dir = Path("../../data/output/")
car_file_dir = Path("../../data/car_files/")
temp_dir = "../../data/temp/"

## Load ENV variables

In [15]:
pow_path = os.getenv("POW_PATH")

## Extract .tar files (Landsat Scenes) to a new directory

In [20]:
for tar in source_tars:
    destination_extracted_tar_file = f"{extracted_tar_dir}/{tar.stem}"
    print(f"Extracting TAR file to {extracted_tar_dir}")
    with tarfile.open(str(tar)) as tar:
        tar.extractall(path=destination_extracted_tar_file)

Extracting TAR file to ../../data/output
Extracting TAR file to ../../data/output


## Generate CARS

In [None]:
# car_dir.mkdir(parents=True, exist_ok=True) # create car dir automatically

In [3]:
extracted_tars = [x for x in extracted_tar_dir.glob("*") if x.is_dir()]
len(extracted_tars)

2

In [None]:
df = pd.DataFrame(columns=["name", "payload_cid", "piece_size", "piece_cid", "file"])
for target in extracted_tars:
    print(target)
    car_target = Path(f"{car_file_dir}/{target.stem}.car")

    if car_target.exists():
        print(f"Skipping {car_target.name}")
        continue

    # Don't aggregate
    # result = subprocess.run([str(pow_path), "offline", "prepare", "--json", str(target), str(car_target)], shell=True, capture_output=True)

    # Aggregate
    result = subprocess.run(
        f"{pow_path} offline prepare --json --aggregate {target!s} {car_target!s} --tmpdir {temp_dir}",
        shell=True,
        capture_output=True,
    )
    # limited tmp folder on GEOG cluster; add tmpdir to redirect tmp files

    try:
        result = json.loads(result.stderr.decode("utf-8"))
        temp_df = {
            "name": str(car_target.stem),
            "payload_cid": result["payload_cid"],
            "piece_size": result["piece_size"],
            "piece_cid": result["piece_cid"],
            "file": str(car_target.name),
        }
        df = df.append(temp_df, ignore_index=True)
        df.to_csv(
            f"{car_file_dir.parent!s}/{car_file_dir.stem!s}_car_master.csv"
        )  # CHANGE AS REQUIRED
        with Path.open(
            f"{car_file_dir.parent!s}/{car_file_dir.stem!s}_{target.stem!s}_car_reference.json",
            "w",
            encoding="utf-8",
        ) as fw:
            json.dump(result, fw, ensure_ascii=False, indent=4)
    except Exception as e:
        print(f"error - {target}  - {e}")

    print(result)