In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def fog_season(data_frame):
    month_list = range(1, 13)
    fog_count = []

    for m in month_list:
        fog_count.append(
            data_frame[(data_frame['wx'].str.contains("Fog")) & (data_frame.obs_time.dt.month == m)].shape[0])

    station_name = data_frame.iloc[0].station_id
    plt.bar(month_list, fog_count)
    # plt.xticks(m)
    plt.title('{} Airport - Fog reports per month'.format(station_name))
    plt.savefig('{}_fog_seasonality.png'.format(station_name))
    plt.clf()

In [3]:
def rearrange(data_frame):
    
    # replacing 'Fog', 'Partial Fog' etc occurrences for 1's, other for 0's
    wx_types = list(data_frame.presentwx.unique())
    fog_types = []

    for i in wx_types:
        if 'FG' in i:
            fog_types.append(i)
            wx_types.remove(i)

    data_frame.presentwx = data_frame.presentwx.replace(fog_types, 1)
    data_frame.presentwx = data_frame.presentwx.replace(wx_types, 0)
    print('1')
    # removing consecutive observations of fog
    del_indexes = []
    for index, row in data_frame[0:-1].iterrows():
        if row.presentwx == 1 and data_frame.iloc[index + 1].presentwx == 1:
            del_indexes.append(index)

    data_frame = data_frame.drop(data_frame.index[del_indexes])
    print('2')
    # placing features from X hours before in current line
    lead_hours = 6

    tmpf = (lead_hours+1)*['M']
    dwpf = (lead_hours+1)*['M']
    relh = (lead_hours+1)*['M']
    drct = (lead_hours+1)*['M']
    sknt = (lead_hours+1)*['M']
    alti = (lead_hours+1)*['M']

    for index, row in data_frame[lead_hours:-1].iterrows():
        valid_time = row['valid'] - pd.Timedelta(hours=lead_hours, minutes=row['valid'].minute)
        lead_row = data_frame.loc[data_frame['valid'] == valid_time]

        try:
            tmpf.append(lead_row['tmpf'].values[0])
        except:
            tmpf.append('M')

        try:
            dwpf.append(lead_row['dwpf'].values[0])
        except:
            dwpf.append('M')

        try:
            relh.append(lead_row['relh'].values[0])
        except:
            relh.append('M')

        try:
            drct.append(lead_row['drct'].values[0])
        except:
            drct.append('M')

        try:
            sknt.append(lead_row['sknt'].values[0])
        except:
            sknt.append('M')

        try:
            alti.append(lead_row['alti'].values[0])
        except:
            alti.append('M')

    data_frame['tmpf_{}h'.format(lead_hours)] = np.asarray(tmpf)
    data_frame['dwpf_{}h'.format(lead_hours)] = np.asarray(dwpf)
    data_frame['relh_{}h'.format(lead_hours)] = np.asarray(relh)
    data_frame['drct_{}h'.format(lead_hours)] = np.asarray(drct)
    data_frame['sknt_{}h'.format(lead_hours)] = np.asarray(sknt)
    data_frame['alti_{}h'.format(lead_hours)] = np.asarray(alti)

    # removing some columns, sending 'presentwx' to the right
    cols = data_frame.columns.tolist()
    for i in ['tmpf', 'dwpf', 'relh', 'drct', 'sknt', 'p01i', 'alti', 'vsby']:
        cols.remove(i)
    cols.append(cols.pop(cols.index('presentwx')))
    data_frame = data_frame[cols]
    
    print('3')
    # removing 'M' rows
    
    data_frame = data_frame.replace('M', np.nan)
    
    data_frame = data_frame.dropna(how='any')
    
    print('4')    
    return data_frame

In [4]:
# column_names = ['station_id', 'obs_time', 'wx', 'rh', 't', 'td', 'wdir', 'wspeed', 'pressure']
column_names = ['station', 'valid', 'tmpf', 'dwpf', 'relh', 'drct', 'sknt', 'p01i', 'alti', 'mslp', 'vsby', 'gust',
                'skyc1', 'skyc2', 'skyc3', 'skyc4', 'skyl1', 'skyl2', 'skyl3', 'skyl4', 'presentwx', 'metar']

usecols = ['station', 'valid', 'tmpf', 'dwpf', 'relh', 'drct', 'sknt', 'p01i', 'alti', 'vsby', 'presentwx']

dataset = pd.read_csv('./SBPA.csv', names=column_names, skiprows=6, parse_dates=['valid'], usecols=usecols,
                 date_parser=lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M'), low_memory=False)

dataset = rearrange(dataset)

1
2
3
4
5
6
7
8


In [5]:
dataset.head()

Unnamed: 0,station,valid,tmpf_6h,dwpf_6h,relh_6h,drct_6h,sknt_6h,alti_6h,presentwx
7,SBPA,2012-01-01 07:00:00,66.2,62.6,88.18,110.0,11.0,29.83,0
8,SBPA,2012-01-01 08:00:00,66.2,62.6,88.18,110.0,10.0,29.83,0
9,SBPA,2012-01-01 09:00:00,66.2,62.6,88.18,100.0,9.0,29.83,0
10,SBPA,2012-01-01 10:00:00,64.4,62.6,93.88,120.0,7.0,29.83,0
11,SBPA,2012-01-01 11:00:00,64.4,62.6,93.88,120.0,6.0,29.83,0


In [12]:
# Reading Features and Labels
X = dataset.iloc[:, 2:8].values
y = dataset.iloc[:, 8].values

In [21]:
# Generatin Training and Testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [22]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)



In [25]:
from keras.models import Sequential
from keras.layers import Dense


classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(activation='relu', input_dim=X_train.shape[1]))
# Adding the second hidden layer
classifier.add(Dense(activation='relu'))
# Adding the output layer
classifier.add(Dense(output_dim=1, activation='sigmoid'))


# Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

TypeError: __init__() missing 1 required positional argument: 'units'