In [7]:
from processing import get_A_CH
from processing import get_P_CH

fits_file = r"D:\Data\EUV\193\2017\aia.lev1_5_euv_12s.2017-01-30T120006Z.193.image_lev1_5.fits"

In [8]:
A_CH = get_A_CH(fits_file)
print(A_CH)

(<Time object: scale='utc' format='isot' value=2017-01-30T12:00:04.846>, 0.4057118970264139)


In [9]:
print(A_CH[0], A_CH[1])

2017-01-30T12:00:04.846 0.4057118970264139


In [10]:
P_CH_30 = get_P_CH(fits_file)
P_CH_90 = get_P_CH(fits_file, lon=7.5, lat=90)
print(P_CH_30)
print(P_CH_90)

(<Time object: scale='utc' format='isot' value=2017-01-30T12:00:04.846>, 18033.83353508105)
(<Time object: scale='utc' format='isot' value=2017-01-30T12:00:04.846>, 27649.09364136569)


In [11]:
fits_file = r"D:\Data\EUV\193\2017\aia.lev1_5_euv_12s.2017-01-28T120006Z.193.image_lev1_5.fits"
A_CH = get_A_CH(fits_file)
P_CH_30 = get_P_CH(fits_file)
P_CH_90 = get_P_CH(fits_file, lon=7.5, lat=90)
print(A_CH)
print(P_CH_30)
print(P_CH_90)

(<Time object: scale='utc' format='isot' value=2017-01-28T12:00:04.843>, 0.027339411222224892)
(<Time object: scale='utc' format='isot' value=2017-01-28T12:00:04.843>, 9995.355801037378)
(<Time object: scale='utc' format='isot' value=2017-01-28T12:00:04.843>, 12736.156412926932)


In [21]:
import numpy as np
import pandas as pd

import argparse
from pathlib import Path
from tqdm import tqdm

from sunpy.time import parse_time
from datetime import datetime, timedelta
from multiprocessing import Pool

from processing import get_A_CH
from processing import get_P_CH

import warnings

In [27]:
channel = '193'
start = "2012-01-01"
end = "2013-02-21"
cadence = 12
base_dir = "D:\\Data\\EUV"
save_dir = "D:\\Data\\EUV"
cores = 4

In [28]:
base_dir = Path(base_dir)
save_dir = Path(save_dir)
start_dt = parse_time(start).to_datetime()
end_dt = parse_time(end).to_datetime()
channels = [chan.strip() for chan in channel.split(',')]   # e.g., [193,211]
years = range(start_dt.year, end_dt.year + 1)

In [29]:
def get_parameter(file):
    if file.exists():
        try:
            #a_ch = get_A_CH(file)
            p_ch30 = get_P_CH(file, lon=10, lat=30)
            p_ch90 = get_P_CH(file, lon=10, lat=90)
        except Exception:
            _, p_ch30, p_ch90 = np.nan, np.nan, np.nan
    else:
        _, p_ch30, p_ch90 = np.nan, np.nan, np.nan
    return np.nan, p_ch30, p_ch90

get_parameter(Path("D:/Data/EUV/193/2012/aia.lev1_5_euv_12s.2012-01-01T000009Z.193.image_lev1_5.fits"))

(nan,
 (<Time object: scale='utc' format='isot' value=2012-01-01T00:00:07.838>,
  4206.427192764431),
 (<Time object: scale='utc' format='isot' value=2012-01-01T00:00:07.838>,
  9053.077741189918))

In [30]:
for chan in channels:
    for year in years:
        source_dir = base_dir / str(chan) / str(year)
        destination_dir = save_dir
        destination_dir.mkdir(parents=True, exist_ok=True)
            
        save_file = destination_dir / "CH_Indics.csv"
         # --- resume logic: read existing times ---
        if save_file.exists():
            df = pd.read_csv(save_file)
            df.columns = df.columns.str.strip()
            if 'datetime' not in df.columns:
                alt = [c for c in df.columns if c.strip().lower() == 'datetime']
                if alt:
                    df.rename(columns={alt[0]: 'datetime'}, inplace=True)
                else:
                    raise KeyError(
                        f"there are no 'datetime' column. "
                        f"column in the files: {df.columns.tolist()}"
                    )
            df['datetime'] = pd.to_datetime(
                df['datetime'],
                format="%Y-%m-%d_%H",
                errors='coerce'
            )
            processed = set(df['datetime'].dt.to_pydatetime())
        else:
            save_file.write_text("datetime,A_CH,P_CH30,P_CH90\n")
            processed = set()

        year_start = max(start_dt, datetime(year, 1, 1, 0, 0))
        year_end   = min(end_dt,   datetime(year, 12, 31, 23, 59, 59))
        current = year_start
        dt_list = []
        while current <= year_end:
            dt_list.append(current)
            current += timedelta(hours=cadence)
            
        to_do = [dt for dt in dt_list if dt not in processed]
        
        def process_dt(dt):
            prefix = dt.strftime('%Y-%m-%dT%H')
            fname = f"aia.lev1_5_euv_12s.{prefix}*Z.{chan}.image_lev1_5.fits"
            matches = list(source_dir.glob(fname))
            if matches:
                fpath = matches[0]
            else:
                # dummy path to trigger nan in get_parameter
                fpath = source_dir / 'file_not_found.fits'
            a_ch, p_ch30, p_ch90 = get_parameter(fpath)
            return dt, a_ch, p_ch30, p_ch90
        
        

  processed = set(df['datetime'].dt.to_pydatetime())
  processed = set(df['datetime'].dt.to_pydatetime())


In [35]:
import os


In [36]:
save_file = save_dir / str(chan) / f"CH_Indics_{chan}.csv"
fmt = '%Y-%m-%dT%H:%M:%S'
def get_last_processed(save_file: Path, fmt: str = '%Y-%m-%dT%H:%M:%S'):
    """
    Read the last line of the CSV and parse its datetime.
    Returns None if file does not exist or parse fails.
    """
    if not save_file.exists() or save_file.stat().st_size == 0:
        return None
    with open(save_file, 'rb') as f:
        try:
            f.seek(-2, os.SEEK_END)
            while f.read(1) != b'\n':
                f.seek(-2, os.SEEK_CUR)
        except OSError:
            f.seek(0)
        last_line = f.readline().decode().strip()
    if not last_line:
        return None
    last_dt_str = last_line.split(',')[0]
    try:
        return datetime.strptime(last_dt_str, fmt)
    except ValueError:
        return None
    
get_last_processed(save_file, fmt)

datetime.datetime(2012, 1, 20, 12, 0)