# New approach for O$_3$ analysis over Japan

In [1]:
import urllib.request
import zipfile
import glob, os
import numpy as np
import pandas as pd
import datetime

## Parameters

In [2]:
m_url = 'https://www.nies.go.jp/igreen/tj/2016/j01_2016.zip'
d_path = '../inp_data/2009-2017/'
years = [2009, 2017]
pref_max = 47

## Donwloading and Unzipping files of raw data
Downloading the atmospheric environment measurement station data (時間値データ（2009～2017年度）) from https://www.nies.go.jp/igreen/tm_down.html

In [13]:
%%time
for yr in range(years[0], years[1]+1, 1):
    s_yr = str(yr)
    for pr in range(1, pref_max+1):
        s_pr = str(pr).zfill(2)
        f_name = 'j' + s_pr + '_' + s_yr + '.zip'
        url = 'https://www.nies.go.jp/igreen/tj/' + s_yr + '/' + f_name
        f_name = d_path + f_name
        print(f'Downlading: {url} and placing: {f_name}')

        if os.path.exists(f_name):
            print(f'File exist')
        else:
            urllib.request.urlretrieve(url, f_name)
    print(f'Downlading done for: {yr}')

Downlading: https://www.nies.go.jp/igreen/tj/2012/j01_2012.zip and placing: ../inp_data/2009-2017/j01_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j02_2012.zip and placing: ../inp_data/2009-2017/j02_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j03_2012.zip and placing: ../inp_data/2009-2017/j03_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j04_2012.zip and placing: ../inp_data/2009-2017/j04_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j05_2012.zip and placing: ../inp_data/2009-2017/j05_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j06_2012.zip and placing: ../inp_data/2009-2017/j06_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j07_2012.zip and placing: ../inp_data/2009-2017/j07_2012.zip
File exist
Downlading: https://www.nies.go.jp/igreen/tj/2012/j08_2012.zip and placing: ../inp_data/2009-2017/j08_2012.zip
File exist
Downlading: https://www.

## Unzipping files at the directory

In [44]:
%%time
years = [2009,2017]
for yr in range(years[0], years[1]+1, 1):
    s_yr = str(yr)
    for pr in range(20, pref_max+1, 100):
        s_pr = str(pr).zfill(2)
        f_name = 'j' + s_pr + '_' + s_yr + '.zip'
        f_name = d_path + f_name

        print(f'Unzipping: {f_name}')
        try:
            with zipfile.ZipFile(f_name,"r") as zip_ref:
                zip_ref.extractall(d_path + 'uzip')
        except:
            print('No file')

Unzipping: ../inp_data/2009-2017/j20_2009.zip
Unzipping: ../inp_data/2009-2017/j20_2010.zip
Unzipping: ../inp_data/2009-2017/j20_2011.zip
Unzipping: ../inp_data/2009-2017/j20_2012.zip
Unzipping: ../inp_data/2009-2017/j20_2013.zip
Unzipping: ../inp_data/2009-2017/j20_2014.zip
Unzipping: ../inp_data/2009-2017/j20_2015.zip
Unzipping: ../inp_data/2009-2017/j20_2016.zip
Unzipping: ../inp_data/2009-2017/j20_2017.zip
Wall time: 746 ms


## List files

In [45]:
pth = "D:/OneDrive - chiba-u.jp/Students/Ozone/inp_data/2009-2017/uzip/01ûkèCô╣/"
# for filename in os.listdir(pth):
#     print(filename)

## Rename folders

In [48]:
import shutil
basedir = "D:/OneDrive - chiba-u.jp/Students/Ozone/inp_data/2009-2017/uzip/"
for fn in os.listdir(basedir):
    if not os.path.isdir(os.path.join(basedir, fn)):
        continue  # Not a directory
    if len(fn) == 2:
        continue  # Already in the correct form
    if len(fn) > 2:
        old_d = os.path.join(basedir, fn)
        new_d = os.path.join(basedir, fn[:2])
        print(old_d[30:], new_d[30:])
        try:
            os.rename(old_d, new_d)
        except:
            files = os.listdir(old_d)
            for f in files:
                shutil.move(old_d+'/'+f, new_d + '/' + f)
            os.rmdir(old_d)
            

## Collect df for one year

In [28]:
%%time
u_path = d_path + 'uzip/'
frames = []
cnt = 0
for yr in range(years[0], years[0]+1, 1):
    s_yr = str(yr)
    for pr in range(1, pref_max+1):
        s_pr = str(pr).zfill(2)
        f_name = 'j' + s_pr + s_yr + '_' + '06.txt'
        f_name = u_path + s_pr + '/' + s_yr + '/' + f_name

        print(f_name)

        # --- read
        df_r = pd.read_csv(f_name, encoding='cp932')
#         print('Original df len:', len(df_r))
#         print(df_r.head())
        cnt += len(df_r)
        frames.append(df_r)
df_s = pd.concat(frames)
print('Full df len:', len(df_s), cnt)

../inp_data/2009-2017/uzip/01/2012/j012012_06.txt
../inp_data/2009-2017/uzip/02/2012/j022012_06.txt
../inp_data/2009-2017/uzip/03/2012/j032012_06.txt
../inp_data/2009-2017/uzip/04/2012/j042012_06.txt
../inp_data/2009-2017/uzip/05/2012/j052012_06.txt
../inp_data/2009-2017/uzip/06/2012/j062012_06.txt
../inp_data/2009-2017/uzip/07/2012/j072012_06.txt
../inp_data/2009-2017/uzip/08/2012/j082012_06.txt
../inp_data/2009-2017/uzip/09/2012/j092012_06.txt
../inp_data/2009-2017/uzip/10/2012/j102012_06.txt
../inp_data/2009-2017/uzip/11/2012/j112012_06.txt
../inp_data/2009-2017/uzip/12/2012/j122012_06.txt
../inp_data/2009-2017/uzip/13/2012/j132012_06.txt
../inp_data/2009-2017/uzip/14/2012/j142012_06.txt
../inp_data/2009-2017/uzip/15/2012/j152012_06.txt
../inp_data/2009-2017/uzip/16/2012/j162012_06.txt
../inp_data/2009-2017/uzip/17/2012/j172012_06.txt
../inp_data/2009-2017/uzip/18/2012/j182012_06.txt
../inp_data/2009-2017/uzip/19/2012/j192012_06.txt
../inp_data/2009-2017/uzip/20/2012/j202012_06.txt


FileNotFoundError: [Errno 2] File b'../inp_data/2009-2017/uzip/20/2012/j202012_06.txt' does not exist: b'../inp_data/2009-2017/uzip/20/2012/j202012_06.txt'

## Drop columns

In [None]:
df_s.drop(columns=['測定項目コード', '測定単位コード'], inplace=True)

In [None]:
print(df_s.head())

## Cut wrong days and add index

In [None]:
%%time
df = df_s.copy()
df['data'] = pd.to_datetime(dict(year=df.測定年度, month=df.測定月, day=df.測定日), errors='coerce')