In [1]:
import numpy as np
import os
import json
import pandas as pd
import glob
from tqdm import tqdm

In [2]:
# 取得某個階段的所有檔名
def get_filename(root_dir):
    tmp = glob.glob(root_dir + '/*.json')
        
    return tmp

In [3]:
def Station2CSV(p):
    sta = pd.DataFrame()
    
    for k in p.keys():
        try:
            # 看測站內有多少組波形資料
            n_data = p[k]['numberOfData']
            
            for w in range(n_data):         
                tmp = {}
                tmp['event'] = str(p['event']) # PK
                tmp['distance'] = p[k]['distance']
                tmp['p_weight'] = p[k]['p_weight']
                tmp['s_weight'] = p[k]['s_weight']
                tmp['network'] = p[k][str(w)]['network']
                tmp['location'] = p[k][str(w)]['location']
                tmp['sampling_rate'] = p[k][str(w)]['sampling_rate']
                tmp['instrument'] = p[k][str(w)]['instrument']
                tmp['starttime'] = p[k][str(w)]['starttime'][:-1]
                ptime, stime = list(p[k][str(w)]['p_arrival_time']), list(p[k][str(w)]['s_arrival_time'])
                ptime[10]=stime[10]='T'
                ptime, stime = "".join(ptime), "".join(stime)
                tmp['p_arrival_time'] = ptime
                tmp['s_arrival_time'] = stime
                tmp['pga'] = p[k][str(w)]['pga']
                tmp['pgv'] = p[k][str(w)]['pgv']
                tmp['intensity'] = p[k][str(w)]['intensity']
                
                # 檢查波型存不存在
                if os.path.exists('/mnt/nas6/CWBSN'+p[k][str(w)]['waveFile']):
                    tmp['waveFile'] = p[k][str(w)]['waveFile']
                else:
                    continue
                    
                tmp['machine_check'] = 0
                tmp['user_check'] = ''
                tmp['label'] = ''
                tmp['comment'] = ''
                
                #tmp['instrument_available'] = p[k][str(w)]['DataAvailable']['instrument']
                #tmp['intensity_available'] = p[k][str(w)]['DataAvailable']['intensity']
                #tmp['pga_available'] = p[k][str(w)]['DataAvailable']['pga']
                #tmp['pgv_available'] = p[k][str(w)]['DataAvailable']['pgv']
                #tmp['Stime_available'] = p[k][str(w)]['DataAvailable']['Stime']
             
                idx = k+'_'+str(w)
                info = pd.DataFrame(tmp, index=[idx])
                sta = pd.concat([sta, info])
           
        except Exception as e:
            #print(e)
            pass
        
    return sta

In [7]:
# finish: 
#all_year = ['2012', '2013', '2014', '2015', '2016', '2017', '2019', '2021']
all_year = ['2018']
for year in all_year:
    root_path = '/mnt/nas6/CWBSN/' + year
    save_path = '/mnt/nas6/CWBSN/' + year + '/csv'
    files = get_filename(root_path)

    if not os.path.exists(save_path):
        print('creating directiory: %s' %(save_path))
        os.mkdir(save_path)

    event_df = pd.DataFrame()
    sta_df = pd.DataFrame()

    for file in tqdm(files):
        try:
            f = open(os.path.join(root_path, file), 'r')
            p = json.load(f)
            otime = list(p['ori_time'])
            otime[10]='T'
            otime = "".join(otime)
            p['ori_time'] = otime
            df = pd.json_normalize(p)

            event_df = pd.concat([event_df, pd.concat([df.loc[:, :'lon'], df['quality'], df['event']], axis=1)])
            sta_df = pd.concat([sta_df, Station2CSV(p)])
        except:
            print(file)
            
    event_df.to_csv(os.path.join(save_path, 'event.csv'), index=False)
    sta_df.to_csv(os.path.join(save_path, 'station.csv'))

creating directiory: /mnt/nas6/CWBSN/2018/csv


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 2440/2440 [19:41<00:00,  2.06it/s]


In [6]:
event_df

Unnamed: 0,ori_time,depth,mag,lat,lon,quality,event
0,2020-12-10T18:15:08.560000,62.74,5.75,24.53,121.97,B,20121018150
0,2020-08-28T13:03:32.329999,19.86,4.16,22.44,120.96,B,20082813030
0,2020-09-20T23:54:48.219999,9.44,4.72,22.43,121.58,C,20092023540
0,2020-01-15T12:42:38.600000,3.28,1.95,25.18,121.62,A,20011512412
0,2020-02-26T05:35:34.630000,11.94,2.47,24.05,121.10,A,20022605342
...,...,...,...,...,...,...,...
0,2020-11-02T07:21:23.769999,10.35,3.21,23.29,120.40,B,20110207200
0,2020-09-28T22:17:33.399999,13.28,4.02,22.36,121.00,B,20092822170
0,2020-04-03T20:11:22.859999,12.33,3.09,23.20,120.14,B,20040320110
0,2020-02-15T20:14:49.399999,9.68,2.65,23.83,121.54,B,20021520132
