# combine master files

## 필요한 모듈

이 프로젝트를 위해서는 아래의 모듈이 필요하다. 

> pandas, BeautifulSoup4, version_information

### 모듈 설치

1. 콘솔 창에서 모듈을 설치할 때는 아래와 같은 형식으로 입력하면 된다.

>pip install module_name==version

>conda install module_name==version

2. 주피터 노트북(코랩 포함)에 설치 할 때는 아래의 셀을 실행해서 실행되지 않은 모듈을 설치할 수 있다. (pip 기준) 만약 아나콘다 환경을 사용한다면 7행을 콘다 설치 명령어에 맞게 수정하면 된다.

In [1]:
# Install a pip package in the current Jupyter kernel
import importlib, sys, subprocess

packages = "pandas, BeautifulSoup4, version_information" # required modules
pkgs = packages.split(", ")
for pkg in pkgs :
    if not importlib.util.find_spec(pkg):
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg, '-q'])
        print(f"**** {pkg} module is now installed.")
    else: 
        print(f"******** {pkg} module is already installed.")
%load_ext version_information
import time
now = time.strftime("%Y-%m-%d %H:%M:%S (%Z = GMT%z)")
print(f"This notebook was generated at {now} ")

vv = %version_information {packages}
for i, pkg in enumerate(vv.packages):
    print(f"{i} {pkg[0]:10s} {pkg[1]:s}")

******** pandas module is already installed.
**** BeautifulSoup4 module is now installed.
******** version_information module is already installed.
This notebook was generated at 2024-03-21 08:49:14 (대한민국 표준시 = GMT+0900) 
0 Python     3.9.18 64bit [MSC v.1916 64 bit (AMD64)]
1 IPython    8.15.0
2 OS         Windows 10 10.0.22631 SP0
3 pandas     2.1.4
4 BeautifulSoup4 4.12.2
5 version_information 1.0.4


### 모듈 버전 확인

아래 셀을 실행하면 이 노트북을 실행한 파이썬 및 관련 모듈의 버전을 확인할 수 있다.

### import modules

In [3]:
from glob import glob
from datetime import datetime
import pandas as pd
import os
import _SDO_utilities

In [4]:
filelist_dir_name = './SOHO_filelists/'
save_dir_name = './SOHO_filelists/'

In [5]:
#make Pandas DataFrame from file
df = pd.read_csv('./SOHO_filelists/SOHO_filelist.txt')
print("df: {}".format(df))
df

df:              #this file is created by guitar79@naver.com
0      https://soho.nascom.nasa.gov/data/synoptic/sun...
1      https://soho.nascom.nasa.gov/data/synoptic/sun...
2      https://soho.nascom.nasa.gov/data/synoptic/sun...
3      https://soho.nascom.nasa.gov/data/synoptic/sun...
4      https://soho.nascom.nasa.gov/data/synoptic/sun...
...                                                  ...
12810  https://soho.nascom.nasa.gov/data/synoptic/sun...
12811  https://soho.nascom.nasa.gov/data/synoptic/sun...
12812  https://soho.nascom.nasa.gov/data/synoptic/sun...
12813  https://soho.nascom.nasa.gov/data/synoptic/sun...
12814  https://soho.nascom.nasa.gov/data/synoptic/sun...

[12815 rows x 1 columns]


Unnamed: 0,#this file is created by guitar79@naver.com
0,https://soho.nascom.nasa.gov/data/synoptic/sun...
1,https://soho.nascom.nasa.gov/data/synoptic/sun...
2,https://soho.nascom.nasa.gov/data/synoptic/sun...
3,https://soho.nascom.nasa.gov/data/synoptic/sun...
4,https://soho.nascom.nasa.gov/data/synoptic/sun...
...,...
12810,https://soho.nascom.nasa.gov/data/synoptic/sun...
12811,https://soho.nascom.nasa.gov/data/synoptic/sun...
12812,https://soho.nascom.nasa.gov/data/synoptic/sun...
12813,https://soho.nascom.nasa.gov/data/synoptic/sun...


In [6]:
df

Unnamed: 0,#this file is created by guitar79@naver.com
0,https://soho.nascom.nasa.gov/data/synoptic/sun...
1,https://soho.nascom.nasa.gov/data/synoptic/sun...
2,https://soho.nascom.nasa.gov/data/synoptic/sun...
3,https://soho.nascom.nasa.gov/data/synoptic/sun...
4,https://soho.nascom.nasa.gov/data/synoptic/sun...
...,...
12810,https://soho.nascom.nasa.gov/data/synoptic/sun...
12811,https://soho.nascom.nasa.gov/data/synoptic/sun...
12812,https://soho.nascom.nasa.gov/data/synoptic/sun...
12813,https://soho.nascom.nasa.gov/data/synoptic/sun...


In [7]:
df_1024 = df[df['#this file is created by guitar79@naver.com'].str.contains("sunspots_1024")]
df_1024

Unnamed: 0,#this file is created by guitar79@naver.com
7,https://soho.nascom.nasa.gov/data/synoptic/sun...
8,https://soho.nascom.nasa.gov/data/synoptic/sun...
9,https://soho.nascom.nasa.gov/data/synoptic/sun...
10,https://soho.nascom.nasa.gov/data/synoptic/sun...
13,https://soho.nascom.nasa.gov/data/synoptic/sun...
...,...
6409,https://soho.nascom.nasa.gov/data/synoptic/sun...
6410,https://soho.nascom.nasa.gov/data/synoptic/sun...
6411,https://soho.nascom.nasa.gov/data/synoptic/sun...
6412,https://soho.nascom.nasa.gov/data/synoptic/sun...


In [10]:
#df['wget_sh'] = 'str ' + df['#this file is created by guitar79@naver.com'].astype(str)
wget_sh = ""    
for index, value in df_1024['#this file is created by guitar79@naver.com'].items():
    fullname_el = value.split("/")
    filename = fullname_el[-1]
    new_foldername = save_dir_name
    if not os.path.exists(new_foldername):
        os.makedirs(new_foldername)
        print ('{} is created...'.format(new_foldername))
    
    #[ -f "../browse/2021/01/01/20210101_000000_4096_HMIIF.jpg" ] && echo "../browse/2021/01/01/20210101_000000_4096_HMIIF.jpg exists." || wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ../browse/2021/01/01/ https://sdo.gsfc.nasa.gov/assets/img/browse/2021/01/01/20210101_000000_4096_HMIIF.jpg
    wget_sh += '[ -f "{0}{1}" ] && echo "{0}{1} exists." || '.format(new_foldername, filename)
    wget_sh += "wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P "
    wget_sh += "{} {}\n".format(new_foldername, value)
wget_sh

'[ -f "./SOHO_filelists/mdi_sunspots_1024.jpg" ] && echo "./SOHO_filelists/mdi_sunspots_1024.jpg exists." || wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/mdi_sunspots_1024.jpg\n[ -f "./SOHO_filelists/problem.sunspots_1024_20161130.jpg" ] && echo "./SOHO_filelists/problem.sunspots_1024_20161130.jpg exists." || wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/problem.sunspots_1024_20161130.jpg\n[ -f "./SOHO_filelists/problem.sunspots_1024_20161201.jpg" ] && echo "./SOHO_filelists/problem.sunspots_1024_20161201.jpg exists." || wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/problem.sunspots_1024_20161201.jpg\n[ -f "./SOHO_filelists/problem.sunspots_1024_20161202.jpg" ] && echo "./SOHO_filelists/problem.sunspots_1024_20161

In [8]:
#df['wget_sh'] = 'str ' + df['#this file is created by guitar79@naver.com'].astype(str)
wget_sh = ""    
for index, value in df_1024['#this file is created by guitar79@naver.com'].items():
    fullname_el = value.split("/")
    filename = fullname_el[-1]
    new_foldername = "{}{}/".format(save_dir_name, filename[-12:-8])
#    if not os.path.exists(new_foldername):
#        os.makedirs(new_foldername)
#        print ('{} is created...'.format(new_foldername))
    
    #[ -f "../browse/2021/01/01/20210101_000000_4096_HMIIF.jpg" ] && echo "../browse/2021/01/01/20210101_000000_4096_HMIIF.jpg exists." || wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ../browse/2021/01/01/ https://sdo.gsfc.nasa.gov/assets/img/browse/2021/01/01/20210101_000000_4096_HMIIF.jpg
    wget_sh += "wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P "
    wget_sh += "{} {}\n".format(new_foldername, value)
wget_sh

./SOHO_filelists/ots_/ is created...
./SOHO_filelists/2016/ is created...
./SOHO_filelists/1957/ is created...
./SOHO_filelists/1999/ is created...
./SOHO_filelists/2005/ is created...
./SOHO_filelists/2006/ is created...
./SOHO_filelists/2007/ is created...
./SOHO_filelists/2008/ is created...
./SOHO_filelists/2009/ is created...
./SOHO_filelists/2010/ is created...
./SOHO_filelists/2011/ is created...
./SOHO_filelists/2012/ is created...
./SOHO_filelists/2013/ is created...
./SOHO_filelists/2014/ is created...
./SOHO_filelists/2015/ is created...
./SOHO_filelists/2017/ is created...
./SOHO_filelists/2018/ is created...
./SOHO_filelists/2019/ is created...
./SOHO_filelists/2020/ is created...
./SOHO_filelists/2021/ is created...
./SOHO_filelists/2022/ is created...
./SOHO_filelists/2023/ is created...
./SOHO_filelists/2024/ is created...


'wget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/ots_/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/mdi_sunspots_1024.jpg\nwget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/2016/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/problem.sunspots_1024_20161130.jpg\nwget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/2016/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/problem.sunspots_1024_20161201.jpg\nwget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/2016/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/problem.sunspots_1024_20161202.jpg\nwget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/1957/ https://soho.nascom.nasa.gov/data/synoptic/sunspots_earth/sunspots_1024_19571231.jpg\nwget -T 300 -t 1 -r -nd -np -l 1 -N --no-if-modified-since -P ./SOHO_filelists/1999/ https://soho.nascom.nasa.gov/data/synopt

In [9]:
with open("{0}SOHO_filelists_wget1.bat".format(save_dir_name), "w") as text_file:
    text_file.write(wget_sh)