# 01_03_make_fits_summary

## 필요한 모듈

이 프로젝트를 위해서는 아래의 모듈이 필요하다. 

> numpy, pandas, matplotlib, astropy, version_information

### 모듈 설치

1. 콘솔 창에서 모듈을 설치할 때는 아래와 같은 형식으로 입력하면 된다.

>pip install module_name==version

>conda install module_name==version

2. 주피터 노트북(코랩 포함)에 설치 할 때는 아래의 셀을 실행해서 실행되지 않은 모듈을 설치할 수 있다. (pip 기준) 만약 아나콘다 환경을 사용한다면 7행을 콘다 설치 명령어에 맞게 수정하면 된다.

In [18]:
#import sys
%pip install matplotlib==3.2 numpy==1.23

Note: you may need to restart the kernel to use updated packages.


### 모듈 버전 확인

아래 셀을 실행하면 이 노트북을 실행한 파이썬 및 관련 모듈의 버전을 확인할 수 있다.

In [19]:
import importlib, sys, subprocess
packages = "numpy, pandas, matplotlib, pyhdf, netCDF4, basemap, basemap-data, basemap-data-hires, version_information" # required modules
pkgs = packages.split(", ")
for pkg in pkgs :
    if not importlib.util.find_spec(pkg):
        #print(f"**** module {pkg} is not installed")
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '-q'])
    else: 
        print(f"**** module {pkg} is installed")

%load_ext version_information
import time
now = time.strftime("%Y-%m-%d %H:%M:%S (%Z = GMT%z)")
print(f"This notebook was generated at {now} ")

vv = %version_information {packages}
for i, pkg in enumerate(vv.packages):
    print(f"{i} {pkg[0]:10s} {pkg[1]:s}")

**** module numpy is installed
**** module pandas is installed
**** module matplotlib is installed
**** module pyhdf is installed
**** module netCDF4 is installed
**** module version_information is installed
The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information
This notebook was generated at 2023-06-08 22:56:25 (KST = GMT+0900) 
0 Python     3.8.16 64bit [GCC 11.2.0]
1 IPython    8.12.0
2 OS         Linux 5.15.0 73 generic x86_64 with glibc2.17
3 numpy      1.23.0
4 pandas     2.0.2
5 matplotlib 3.2.0
6 pyhdf      0.10.5
7 netCDF4    1.6.4
8 basemap    1.3.7
9 basemap-data 1.3.2
10 basemap-data-hires 1.3.2
11 version_information 1.0.4


### import modules

In [20]:
import os
from glob import glob
from pathlib import Path
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
import numpy as np
import os
import _MODIS_AOD_utilities
import _Python_utilities
from datetime import datetime
import _Python_utilities

plt.rcParams.update({'figure.max_open_warning': 0})

In [None]:
#######################################################
# for log file
log_dir = "logs/"
log_file = "{}{}.log".format(log_dir, os.path.basename(__file__)[:-3])
err_log_file = "{}{}_err.log".format(log_dir, os.path.basename(__file__)[:-3])
print ("log_file: {}".format(log_file))
print ("err_log_file: {}".format(err_log_file))
if not os.path.exists('{0}'.format(log_dir)):
    os.makedirs('{0}'.format(log_dir))

In [21]:
BASEDIR = Path("/mnt/Rdata/MODIS/Aerosol")
DOINGDIR = BASEDIR/ "MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 3km"
DOINGDIR = BASEDIR/"MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km"

YEARDIRs = sorted(_Python_utilities.getFullnameListOfsubDirs(DOINGDIR))
print ("YEARDIRs: ", format(YEARDIRs))
print ("len(YEARDIRs): ", format(len(YEARDIRs)))

DOINGDIRs:  ['/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2002', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2003', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2004', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2005', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2006', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2007', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2008', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2009', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2010', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2011', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2012', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6.1 - Aerosol 5-Min L2 Swath 10km/2013', '/mnt/Rdata/MODIS/Aerosol/MODIS Aqua C6

In [13]:
for YEARDIR in YEARDIRs[:1] :
    YEARDIR = Path(YEARDIRs)
    DAYDIRs = _Python_utilities.getFullnameListOfallFiles(str(YEARDIRs))
    
    for DAYDIR in DAYDIRs[:1] :
        YEARDIR


    ccd_fpath = Path(f"{DOINGDIR/DOINGDIR.parts[-1]}.csv")
    print("ccd_fpath", ccd_fpath)
    DOINGSUBDIRs = sorted(_Python_utilities.getFullnameListOfallsubDirs(str(DOINGDIR)))
    t = os.path.getmtime(ccd_fpath)
    ccd_fpath_dt = datetime.fromtimestamp(t)
    #print("ccd_fpath_dt: ", ccd_fpath_dt)
    if ccd_fpath.exists() \
        and ccd_fpath_dt < datetime.now() + timedelta(weeks=-2) :
        print(f"{str(ccd_fpath)} is already exist and is not old...")

NameError: name '__file__' is not defined

In [None]:
# t = os.path.getmtime(ccd_fpath)
# ccd_fpath_dt = datetime.fromtimestamp(t)
# print(ccd_fpath_dt)

In [None]:
# ccd_fpath_dt > datetime.now() + timedelta(weeks=-12)

In [None]:
# ccd_fpath.exists() \
        #and ccd_fpath_dt < datetime.now() + timedelta(weeks=-13)

In [None]:
for DOINGDIR in DOINGDIRs[:1] :
    DOINGDIR = Path(DOINGDIR)
    ccd_fpath = Path(f"{DOINGDIR/DOINGDIR.parts[-1]}.csv")
    print("ccd_fpath", ccd_fpath)
    DOINGSUBDIRs = sorted(_Python_utilities.getFullnameListOfallsubDirs(str(DOINGDIR)))
    t = os.path.getmtime(ccd_fpath)
    ccd_fpath_dt = datetime.fromtimestamp(t)
    #print("ccd_fpath_dt: ", ccd_fpath_dt)
    if ccd_fpath.exists() \
        and ccd_fpath_dt < datetime.now() + timedelta(weeks=-2) :
        print(f"{str(ccd_fpath)} is already exist and is not old...")
            
    else : 
        summary_all = pd.DataFrame()

        for DOINGSUBDIR in DOINGSUBDIRs[:1] :
            DOINGSUBDIR = Path(DOINGSUBDIR)
            print("DOINGSUBDIR", DOINGSUBDIR)
            fits_in_dir = sorted(list(DOINGSUBDIR.glob('*.fit*')))
            print("fits_in_dir", fits_in_dir)
            print("len(fits_in_dir)", len(fits_in_dir))
            if len(fits_in_dir) == 0 :
                print(f"There is no fits fils in {DOINGSUBDIR}")
                pass
            else : 
                save_fpath2 = DOINGSUBDIR / f"{DOINGSUBDIR.parts[-1]}.csv"
                save_fpath = DOINGSUBDIR / f"summary_{DOINGSUBDIR.parts[-1]}.csv"
                print (f"Starting...\n{DOINGSUBDIR.name}")
                if save_fpath2.exists():
                    os.remove(str(save_fpath2))
                    print (f"{str(save_fpath2)} is deleted...")
                
                t = os.path.getmtime(save_fpath)
                save_fpath_dt = datetime.fromtimestamp(t)
                #print("save_fpath_dt: ", save_fpath_dt)
                if save_fpath.exists() \
                    and save_fpath_dt < datetime.now() + timedelta(weeks=-2) :
                    print(f"{str(save_fpath)} is already exist and is not old...")
                    summary = pd.read_csv(str(save_fpath))
                
                else : 
                    summary = yfu.make_summary(DOINGSUBDIR/"*.fit*",
                                output = save_fpath,
                                verbose = False
                                )
                    print(f"{save_fpath} is created...")
                summary_all = pd.concat([summary_all, summary], axis = 0)
        
        summary_all.reset_index(inplace=True)
        summary_all.to_csv(str(ccd_fpath))
        print(f"{ccd_fpath} is created...")


































    
        

In [None]:
#%%
for DOINGDIR in DOINGDIRs[:2] :
    DOINGDIR = Path(DOINGDIR)
    DOINGSUBDIRs = sorted(_Python_utilities.getFullnameListOfallsubDirs(str(DOINGDIR)))
    ccd_fpath = Path(f"{DOINGDIR/DOINGDIR.parts[-1]}.csv")
    print("ccd_fpath", ccd_fpath)

    if ccd_fpath.exists() or False:
        print(f"{str(ccd_fpath)} is already exist...")

    else : 
        summary_all = pd.DataFrame()

        for DOINGSUBDIR in DOINGSUBDIRs[:10] :
            DOINGSUBDIR = Path(DOINGSUBDIR)
            print("DOINGSUBDIR", DOINGSUBDIR)
            fits_in_dir = sorted(list(DOINGSUBDIR.glob('*.fit*')))
            print("fits_in_dir", fits_in_dir)
            print("len(fits_in_dir)", len(fits_in_dir))
            if len(fits_in_dir) == 0 :
                print(f"There is no fits fils in {DOINGSUBDIR}")
                pass
            else : 
                save_fpath2 = DOINGSUBDIR / f"{DOINGSUBDIR.parts[-1]}.csv"
                save_fpath = DOINGSUBDIR / f"summary_{DOINGSUBDIR.parts[-1]}.csv"
                print (f"Starting...\n{DOINGSUBDIR.name}")
                if save_fpath2.exists():
                    os.remove(str(save_fpath2))
                    print (f"{str(save_fpath2)} is deleted...")
                
                if save_fpath.exists():
                    print(f"{str(save_fpath)} is already exist...")
                    summary = pd.read_csv(str(save_fpath))
                
                else : 
                    summary = yfu.make_summary(DOINGSUBDIR/"*.fit*",
                                output = save_fpath,
                                verbose = False
                                )
                    print(f"{save_fpath} is created...")
                summary_all = pd.concat([summary_all, summary], axis = 0)
        
        summary_all.reset_index(inplace=True)
        summary_all.to_csv(str(ccd_fpath))
        print(f"{ccd_fpath} is created...")


In [None]:
if len(fits_in_dir) == 0 :
    print(f"There is no fits fils in {DOINGSUBDIR}")
    pass
else : 
    print("ccd_fpath", ccd_fpath)


In [None]:
if ccd_fpath.exists():
    print(f"{BASEDIR/ccd_dir}.csv is already exist...")
else : 
    summary_all = pd.DataFrame()
    

In [None]:
summary_all = pd.DataFrame()
save_fpath2 = fpath/f"{fpath.parts[-1]}.csv"
save_fpath = fpath/f"summary_{fpath.parts[-1]}.csv"
print (f"Starting...\n{fpath.name}")
if save_fpath2.exists():
    os.remove(str(save_fpath2))
    print (f"{str(save_fpath2)} is deleted...")

if save_fpath.exists():
    print(f"{str(save_fpath)} is already exist...")
    summary = pd.read_csv(str(save_fpath))

else : 
    summary = yfu.make_summary(fpath/"*.fit*",
                output = save_fpath,
                verbose = False
                )
    print(f"{save_fpath} is created...")
summary_all = pd.concat([summary_all, summary], axis = 0)
summary_all.to_csv(f"{BASEDIR/ccd_dir}.csv")
summary_all.reset_index(inplace=True)
print(f"{BASEDIR/ccd_dir}.csv is created...")