In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

# for loading/processing the images  
from tensorflow.keras.preprocessing.image import load_img 
from tensorflow.keras.preprocessing.image import img_to_array 
from tensorflow.keras.applications.vgg16 import preprocess_input 

# models 
from tensorflow.keras.applications.vgg16 import VGG16 
from tensorflow.keras.models import Model

# clustering and dimension reduction
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle
import PIL
import PIL.Image
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import csv
import math
import datetime
from PIL import Image as im

In [2]:
# parsivel raw data processing

names = np.array(['Date', 'Time', 'Intensity of precipitation (mm/h)',
               'Precipitation since start (mm)', 'Weather code SYNOP WaWa', 'Weather code METAR/SPECI',
               'Weather code NWS', 'Radar reflectivity (dBz)', 'MOR Visibility (m)',
               'Signal amplitude of Laserband', 'Number of detected particles', 'Temperature in sensor (ºC)',
               'Heating current (A)', 'Sensor voltage (V)', 'Optics status',
               'Kinetic Energy', 'Snow intensity (mm/h)'])
names = np.append(names, ['%.0f' % x for x in np.arange(1, 1025)])
names = np.append(names, ['</SPECTRUM>'])

# add .csv to original Parsivel data file 
df_list = []
dsd_list = []

df = pd.read_csv('2021-06-08-17-32', sep=';', names=names, parse_dates=[['Date', 'Time']], index_col=['Date_Time'],
                 dayfirst=True, infer_datetime_format=True, na_values=[''], skiprows=17, encoding='latin-1')
# remove the repeating column names
# use .copy() to avoid warning of SettingWithCopy
#df = df.drop(df[df.iloc[:, 0] == df.iloc[0, 0]].index).copy()
try:
    df = df.drop(index='Date Time')
except:
    pass

# remove the last column
df = df.drop(['</SPECTRUM>'], axis=1)

# convert data type
df.index = pd.to_datetime(df.index, format='%d.%m.%Y %H:%M:%S').tz_localize('Asia/Taipei')
df.loc[:, names[2]] = df.loc[:, names[2]].astype(float) # Intensity of precipitation (mm/h)

# replace <SPECTRUM> with the value of first DSD cell
df.loc[:, '1'] = df.loc[df.loc[:, '1'] != '<SPECTRUM>ZERO</SPECTRUM>', '1'].str[10:]

# fill NaN with 0
df.iloc[:,:] = df.iloc[:,:].fillna(0)
#　df.iloc[:, 13:1038] = df.iloc[:, 13:1038].astype(int)
df.loc[:, '1'] = pd.to_numeric(df.loc[:, '1'])

# DSD
dsd = df.iloc[:, 15:1039].replace(r'^\s*$', np.nan, regex=True).fillna(0).astype(int)
dsd = dsd.resample('1T', label='right', closed='right').sum()

# intensity
df = df.resample('1T', label='right', closed='right').mean()
pd.set_option('display.max_columns',None)

# shift 8 hours backward
df = df.shift(-8, freq='H')
dsd = dsd.shift(-8, freq='H')

df_list.append(df)
dsd_list.append(dsd)


file_names = ['2021-08-16-20-20','2021-10-23-10-46','2022-01-04-10-15','2022-07-06-11-14', '2022-10-10-02-24']

for file in file_names:
    df = pd.read_csv(file, sep=';', names=names, parse_dates=[['Date', 'Time']], index_col=['Date_Time'],
                 dayfirst=True, infer_datetime_format=True, na_values=[''], encoding='latin-1')

    # remove the repeating column names
    # use .copy() to avoid warning of SettingWithCopy
    #df = df.drop(df[df.iloc[:, 0] == df.iloc[0, 0]].index).copy()
    try:
        df = df.drop(index='Date Time')
    except:
        pass

    # remove the last column
    df = df.drop(['</SPECTRUM>'], axis=1)

    # convert data type
    df.index = pd.to_datetime(df.index, format='%d.%m.%Y %H:%M:%S').tz_localize('Asia/Taipei')
    df.loc[:, names[2]] = df.loc[:, names[2]].astype(float) # Intensity of precipitation (mm/h)

    # replace <SPECTRUM> with the value of first DSD cell
    df.loc[:, '1'] = df.loc[df.loc[:, '1'] != '<SPECTRUM>ZERO</SPECTRUM>', '1'].str[10:]

    # fill NaN with 0
    df = df.fillna(0)
    #　df.iloc[:, 13:1038] = df.iloc[:, 13:1038].astype(int)
    df.loc[:, '1'] = pd.to_numeric(df.loc[:, '1'])

    # DSD
    dsd = df.iloc[:, 15:1039].replace(r'^\s*$', np.nan, regex=True).fillna(0).astype(int)
    dsd = dsd.resample('1T', label='right', closed='right').sum().astype(int)

    # intensity
    df = df.resample('1T', label='right', closed='right').mean()
    pd.set_option('display.max_columns',None)

    # shift 8 hours backward
    df = df.shift(-8, freq='H')
    dsd = dsd.shift(-8, freq='H')

    df_list.append(df)
    dsd_list.append(dsd)

combined_df = pd.concat(df_list).fillna(0)
combined_dsd = pd.concat(dsd_list).fillna(0).astype(int)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
# remove rows with all zeros
dataset_dsd = combined_dsd.loc[(combined_dsd != 0).any(axis=1)]
dataset_dsd
pd.set_option('display.max_rows', None)
dataset_dsd.max(axis=1)
dataset_dsd.to_csv('dataset_dsd.csv')