In [1]:
import numpy as np
import pandas as pd
import re
import sklearn
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

In [2]:
raw = pd.read_csv('data/weather_hk.csv')
df_hk=raw.copy()


In [3]:
#change 'Trace' to 0.05
#change Total rainfall to float type
df_hk['Total Rainfall(mm)']=df_hk['Total Rainfall(mm)'].map(lambda x: '0' if x=='Trace' else x)
df_hk['Total Rainfall(mm)']=df_hk['Total Rainfall(mm)'].astype(float)

In [4]:
#mark values with '#' as missing value
df_hk['PWD_missing']=df_hk['Prevailling Wind Direction(degrees)'].str.contains('#')
df_hk['MWS_missing']=df_hk['Mean Wind Speed(km/h)'].str.contains('#')

In [5]:
#remove # from numeric columns
df_hk['Prevailling Wind Direction(degrees)']=df_hk['Prevailling Wind Direction(degrees)'].str.extract(r'(\d+)')
df_hk['Prevailling Wind Direction(degrees)']=df_hk['Prevailling Wind Direction(degrees)'].astype(float)
df_hk['Mean Wind Speed(km/h)']=df_hk['Mean Wind Speed(km/h)'].str.extract(r'(\d+)')
df_hk['Mean Wind Speed(km/h)']=df_hk['Mean Wind Speed(km/h)'].astype(float)

In [6]:
#mark nan as missing value also
df_hk['PWD_missing']=(df_hk['PWD_missing'] | df_hk['Prevailling Wind Direction(degrees)'].isnull())

In [7]:
#create Date column in datetime format
df_hk['Date']=df_hk['Year'].map(str)+'-'+df_hk['Month'].map(str)+'-'+df_hk['Day'].map(str)
df_hk['Date']=pd.to_datetime(df_hk['Date'],format='%Y-%m-%d')

In [8]:
#Create weekly moving average for different feature
df_hk['humidity_sma_7']=df_hk.loc[:,'Mean Relative Humidity(%)'].rolling(window=7, min_periods=1).mean()
df_hk['rainfall_sma_7']=df_hk.loc[:,'Total Rainfall(mm)'].rolling(window=7, min_periods=1).mean()
df_hk['sunshine_sma_7']=df_hk.loc[:,'Total Bright Sunshine(hours)'].rolling(window=7, min_periods=1).mean()
df_hk['pressure_sma_7']=df_hk.loc[:,'Mean Pressure(hPa)'].rolling(window=7, min_periods=1).mean()
df_hk['cloud_sma_7']=df_hk.loc[:,'Mean Amount of Cloud(%)'].rolling(window=7, min_periods=1).mean()


In [9]:
def get_season(month):
    if 3<=month<=5:
        season='spring'
    elif 6<=month<=9:
        season='summer'
    elif 10<=month<=11:
        season='autumn'
    else: season='winter'
    return season

df_season=pd.get_dummies(df_hk['Month'].apply(get_season),drop_first=True)
df_hk=pd.concat([df_hk,df_season],axis=1)

In [10]:
def get_wind_direction(wind_degree):
    if wind_degree==np.nan:
        wind_degree='None'
    elif wind_degree%45==0:
        
        sector=wind_degree//45
        if sector==8 or sector ==0:
            direction='N'
        elif sector==1:
            direction='NE'
        elif sector==2:
            direction='E'
        elif sector==3:
            direction='SE'
        elif sector==4:
            direction='S'
        elif sector==5:
            direction='SW'
        elif sector==6:
            direction='W'
        else:
            direction='NW'
        
    else:
        sector=wind_degree//45
        if sector==0:
            direction='NNE'
        elif sector==1:
            direction='ENE'
        elif sector==2:
            direction='ESE'
        elif sector==3:
            direction='SSE'
        elif sector==4:
            direction='SSW'
        elif sector==5:
            direction='WSW'
        elif sector==6:
            direction='WNW'
        else:
            direction='NNW'
    return direction

In [11]:
#transform the wind direction degrees into categories
df_hk['wind_direction']=df_hk['Prevailling Wind Direction(degrees)'].apply(get_wind_direction)

In [12]:
#create columns for rainfall on next day
df_hk['rain_tmr_mm']=df_hk['Total Rainfall(mm)'].shift(-1)
df_hk['rain_tmr_class']=df_hk['rain_tmr_mm'].map(lambda x: 1 if x>1.0 else 0)

In [13]:
#drop year and day
df_hk.drop(columns=['Year','Day'],inplace=True)

In [14]:
def reorder_df_cols(df):
    reorder_columns=['Date','Month', 'spring', 'summer', 'winter','Mean Pressure(hPa)', 'pressure_sma_7', 'Absolute Daily Max(deg. C)', 'Mean(deg. C)', 
                     'Absolute Daily min(deg. C)', 'Mean Dew Point(deg. C)','Mean Relative Humidity(%)','humidity_sma_7',
                     'Mean Amount of Cloud(%)','cloud_sma_7', 'Total Bright Sunshine(hours)','sunshine_sma_7', 
                     'Prevailling Wind Direction(degrees)','PWD_missing','wind_direction', 'Mean Wind Speed(km/h)',
                     'MWS_missing', 'Total Rainfall(mm)', 'rainfall_sma_7', 'rain_tmr_mm','rain_tmr_class']
    df=df[reorder_columns]
    return df

df_hk=reorder_df_cols(df_hk)

In [15]:
df_hk

Unnamed: 0,Date,Month,spring,summer,winter,Mean Pressure(hPa),pressure_sma_7,Absolute Daily Max(deg. C),Mean(deg. C),Absolute Daily min(deg. C),...,sunshine_sma_7,Prevailling Wind Direction(degrees),PWD_missing,wind_direction,Mean Wind Speed(km/h),MWS_missing,Total Rainfall(mm),rainfall_sma_7,rain_tmr_mm,rain_tmr_class
0,2000-01-01,1,0,0,1,1017.1,1017.100000,22.7,19.2,17.2,...,9.800000,30.0,False,NNE,12.0,False,0.0,0.000000,0.0,0
1,2000-01-02,1,0,0,1,1017.2,1017.150000,23.6,20.4,17.8,...,9.550000,10.0,False,NNE,9.0,False,0.0,0.000000,0.0,0
2,2000-01-03,1,0,0,1,1016.8,1017.033333,20.5,19.2,18.2,...,9.366667,40.0,False,NNE,26.0,False,0.0,0.000000,0.0,0
3,2000-01-04,1,0,0,1,1016.1,1016.800000,20.5,18.9,17.5,...,8.000000,40.0,False,NNE,25.0,False,0.0,0.000000,0.0,0
4,2000-01-05,1,0,0,1,1013.9,1016.220000,23.2,20.6,18.3,...,7.740000,30.0,False,NNE,16.0,False,0.0,0.000000,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7199,2019-12-27,12,0,0,1,1020.3,1017.314286,21.3,18.0,15.6,...,6.271429,360.0,False,N,32.0,False,0.0,0.200000,0.0,0
7200,2019-12-28,12,0,0,1,1020.2,1017.657143,20.9,18.5,16.8,...,6.714286,70.0,False,ENE,28.0,False,0.0,0.000000,9.3,1
7201,2019-12-29,12,0,0,1,1018.3,1018.014286,19.6,18.8,17.9,...,5.928571,50.0,False,ENE,22.0,False,9.3,1.328571,0.3,0
7202,2019-12-30,12,0,0,1,1020.0,1018.485714,22.5,20.3,18.8,...,5.885714,70.0,False,ENE,22.0,False,0.3,1.371429,0.0,0


In [16]:
df_hk['rain_tmr_class'].value_counts()

0    5210
1    1994
Name: rain_tmr_class, dtype: int64

In [17]:
#export weather data with data cleaning
df_hk.to_csv('data/weather_hk_raw.csv')

## Data with train_test_split

In [18]:
train,test = train_test_split(df_hk, test_size=0.2)

In [19]:
imputer_1 = SimpleImputer(strategy='median')
train['Prevailling Wind Direction(degrees)']=imputer_1.fit_transform(train[['Prevailling Wind Direction(degrees)']])
test['Prevailling Wind Direction(degrees)']=imputer_1.transform(test[['Prevailling Wind Direction(degrees)']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['Prevailling Wind Direction(degrees)']=imputer_1.fit_transform(train[['Prevailling Wind Direction(degrees)']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Prevailling Wind Direction(degrees)']=imputer_1.transform(test[['Prevailling Wind Direction(degrees)']])


In [20]:
imputer_2 = SimpleImputer(strategy='median')
train['Mean Wind Speed(km/h)']=imputer_2.fit_transform(train[['Mean Wind Speed(km/h)']])
test['Mean Wind Speed(km/h)']=imputer_2.transform(test[['Mean Wind Speed(km/h)']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['Mean Wind Speed(km/h)']=imputer_2.fit_transform(train[['Mean Wind Speed(km/h)']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Mean Wind Speed(km/h)']=imputer_2.transform(test[['Mean Wind Speed(km/h)']])


In [21]:
train.columns

Index(['Date', 'Month', 'spring', 'summer', 'winter', 'Mean Pressure(hPa)',
       'pressure_sma_7', 'Absolute Daily Max(deg. C)', 'Mean(deg. C)',
       'Absolute Daily min(deg. C)', 'Mean Dew Point(deg. C)',
       'Mean Relative Humidity(%)', 'humidity_sma_7',
       'Mean Amount of Cloud(%)', 'cloud_sma_7',
       'Total Bright Sunshine(hours)', 'sunshine_sma_7',
       'Prevailling Wind Direction(degrees)', 'PWD_missing', 'wind_direction',
       'Mean Wind Speed(km/h)', 'MWS_missing', 'Total Rainfall(mm)',
       'rainfall_sma_7', 'rain_tmr_mm', 'rain_tmr_class'],
      dtype='object')

In [22]:
#standardize numerical features
scaler = StandardScaler()
num_cols = ['Mean Pressure(hPa)', 'pressure_sma_7',
       'Absolute Daily Max(deg. C)', 'Mean(deg. C)',
       'Absolute Daily min(deg. C)', 'Mean Dew Point(deg. C)',
       'Mean Relative Humidity(%)', 'humidity_sma_7',
       'Mean Amount of Cloud(%)', 'cloud_sma_7',
       'Total Bright Sunshine(hours)', 'sunshine_sma_7',
       'Prevailling Wind Direction(degrees)',
       'Mean Wind Speed(km/h)', 'Total Rainfall(mm)',
       'rainfall_sma_7']

scaler = StandardScaler().fit(train[num_cols].values)
train[num_cols] = scaler.transform(train[num_cols].values)
test[num_cols] = scaler.transform(test[num_cols].values)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train[num_cols] = scaler.transform(train[num_cols].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test[num_cols] = scaler.transform(test[num_cols].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try

In [23]:
#set date as index
train.set_index('Date',drop=True,inplace=True)
test.set_index('Date',drop=True,inplace=True)
#drop categorical columns for SMOTE
train.drop(columns='wind_direction',axis=1,inplace=True)
test.drop(columns='wind_direction',axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [24]:
#set features and target
y_train=train[['rain_tmr_class']]
y_test=test[['rain_tmr_class']]
X_train=train.iloc[:,:-2]
X_test=test.iloc[:,:-2]

In [25]:
X_train

Unnamed: 0_level_0,Month,spring,summer,winter,Mean Pressure(hPa),pressure_sma_7,Absolute Daily Max(deg. C),Mean(deg. C),Absolute Daily min(deg. C),Mean Dew Point(deg. C),...,Mean Amount of Cloud(%),cloud_sma_7,Total Bright Sunshine(hours),sunshine_sma_7,Prevailling Wind Direction(degrees),PWD_missing,Mean Wind Speed(km/h),MWS_missing,Total Rainfall(mm),rainfall_sma_7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-08-28,8,0,1,0,-1.292166,-1.434129,1.573352,1.446069,1.395486,1.035802,...,-0.754471,0.097750,1.260988,0.600734,1.248321,False,-0.128933,False,-0.314603,-0.528172
2011-01-25,1,0,0,1,1.778407,1.336904,-1.621231,-1.807873,-1.764257,-1.737680,...,-0.418444,-0.213663,0.721348,0.169234,-0.681967,False,0.273090,False,-0.314603,-0.594211
2016-12-11,12,0,0,1,0.596236,0.781291,-0.841158,-0.619307,-0.426697,-0.570788,...,-0.712468,-0.769157,0.824137,0.884615,-0.360252,False,1.981688,False,-0.314603,-0.594211
2005-12-14,12,0,0,1,1.824465,1.362692,-1.695523,-1.593541,-1.531638,-2.363405,...,0.841655,0.459662,-1.308726,-0.682412,-1.003681,False,1.077136,False,-0.314603,-0.594211
2018-06-17,6,0,1,0,-1.599223,-1.877213,0.886145,0.919983,0.872093,0.579192,...,-0.334438,0.510162,0.669954,-0.290655,-0.467490,False,1.479159,False,-0.314603,1.353272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2003-12-24,12,0,0,1,0.857235,1.756545,-0.915451,-1.047970,-1.066400,-1.078132,...,-2.686624,-2.915382,1.158199,1.043589,-0.896443,False,-1.033484,False,-0.314603,-0.594211
2000-06-13,6,0,1,0,-0.846933,-1.070753,-0.525414,-0.210129,0.019156,-0.046532,...,1.303692,1.124571,-1.308726,-0.273622,-1.110920,False,0.172585,False,-0.295092,1.423195
2006-05-01,5,1,0,0,-0.463111,-0.414333,0.700414,0.744621,0.678244,0.815953,...,0.421622,1.158237,0.592862,-1.006037,1.248321,False,-0.128933,False,-0.314603,1.955386
2013-10-13,10,0,0,0,-0.263524,-0.411988,0.811853,0.705651,0.755783,0.207140,...,0.001589,-0.440911,0.952622,1.066300,-0.253014,False,1.881182,False,-0.314603,-0.557954


In [26]:
#oversampling
oversample = SMOTE()
X_resampled, y_resampled = oversample.fit_resample(X_train, y_train)

In [27]:
X_resampled

Unnamed: 0,Month,spring,summer,winter,Mean Pressure(hPa),pressure_sma_7,Absolute Daily Max(deg. C),Mean(deg. C),Absolute Daily min(deg. C),Mean Dew Point(deg. C),...,Mean Amount of Cloud(%),cloud_sma_7,Total Bright Sunshine(hours),sunshine_sma_7,Prevailling Wind Direction(degrees),PWD_missing,Mean Wind Speed(km/h),MWS_missing,Total Rainfall(mm),rainfall_sma_7
0,8,0,1,0,-1.292166,-1.434129,1.573352,1.446069,1.395486,1.035802,...,-0.754471,0.097750,1.260988,0.600734,1.248321,False,-0.128933,False,-0.314603,-0.528172
1,1,0,0,1,1.778407,1.336904,-1.621231,-1.807873,-1.764257,-1.737680,...,-0.418444,-0.213663,0.721348,0.169234,-0.681967,False,0.273090,False,-0.314603,-0.594211
2,12,0,0,1,0.596236,0.781291,-0.841158,-0.619307,-0.426697,-0.570788,...,-0.712468,-0.769157,0.824137,0.884615,-0.360252,False,1.981688,False,-0.314603,-0.594211
3,12,0,0,1,1.824465,1.362692,-1.695523,-1.593541,-1.531638,-2.363405,...,0.841655,0.459662,-1.308726,-0.682412,-1.003681,False,1.077136,False,-0.314603,-0.594211
4,6,0,1,0,-1.599223,-1.877213,0.886145,0.919983,0.872093,0.579192,...,-0.334438,0.510162,0.669954,-0.290655,-0.467490,False,1.479159,False,-0.314603,1.353272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8377,8,0,1,0,-0.644238,-1.101587,0.747796,0.781208,0.682983,0.952973,...,0.490224,0.401775,-0.210598,-0.015705,-0.373362,False,-0.764797,False,0.025553,3.008816
8378,3,1,0,0,1.175414,0.547088,-1.925158,-1.678568,-1.503441,-1.537805,...,1.002040,1.324289,-1.210618,-1.648149,-0.525979,False,1.470032,False,-0.295981,-0.182315
8379,11,0,0,0,0.238875,0.527625,0.032893,0.188903,0.315558,0.195138,...,0.870884,0.718016,-0.962347,-0.442303,-0.467490,False,1.509727,False,-0.314603,-0.590720
8380,8,0,1,0,-1.113839,-1.758714,0.548979,0.852531,0.988393,1.050111,...,0.922429,0.983424,-1.277083,-0.826974,0.670864,False,1.370918,False,0.036611,2.175248


In [28]:
X_resampled.to_csv('data/train/f_hk_train.csv',index=False)
X_test.to_csv('data/test/f_hk_test.csv',index=False)
y_resampled.to_csv('data/train/t_hk_train.csv',index=False)
y_test.to_csv('data/test/t_hk_test.csv',index=False)

### Data without train_test_split

In [29]:
timeorder=pd.concat([train,test],axis=0).sort_index()
timeorder

Unnamed: 0_level_0,Month,spring,summer,winter,Mean Pressure(hPa),pressure_sma_7,Absolute Daily Max(deg. C),Mean(deg. C),Absolute Daily min(deg. C),Mean Dew Point(deg. C),...,Total Bright Sunshine(hours),sunshine_sma_7,Prevailling Wind Direction(degrees),PWD_missing,Mean Wind Speed(km/h),MWS_missing,Total Rainfall(mm),rainfall_sma_7,rain_tmr_mm,rain_tmr_class
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-01,1,0,0,1,0.673000,0.717993,-0.618280,-0.853124,-0.872550,-1.027398,...,1.209594,1.883879,-0.896443,False,-1.033484,False,-0.314603,-0.594211,0.0,0
2000-01-02,1,0,0,1,0.688353,0.726198,-0.451122,-0.619307,-0.756241,-1.061221,...,1.081108,1.784520,-1.110920,False,-1.335001,False,-0.314603,-0.594211,0.0,0
2000-01-03,1,0,0,1,0.626942,0.707053,-1.026890,-0.853124,-0.678701,-1.111955,...,1.004017,1.711657,-0.789205,False,0.373596,False,-0.314603,-0.594211,0.0,0
2000-01-04,1,0,0,1,0.519472,0.668762,-1.026890,-0.911578,-0.814396,-0.909018,...,-0.306537,1.168497,-0.789205,False,0.273090,False,-0.314603,-0.594211,0.0,0
2000-01-05,1,0,0,1,0.181709,0.573581,-0.525414,-0.580338,-0.659316,-0.604611,...,0.412982,1.065164,-0.896443,False,-0.631461,False,-0.314603,-0.594211,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-27,12,0,0,1,1.164292,0.753159,-0.878304,-1.086940,-1.182709,-1.382539,...,1.106805,0.481503,2.642419,False,0.976630,False,-0.314603,-0.576083,0.0,0
2019-12-28,12,0,0,1,1.148939,0.809423,-0.952597,-0.989516,-0.950090,-1.061221,...,0.258800,0.657510,-0.467490,False,0.574607,False,-0.314603,-0.594211,9.3,1
2019-12-29,12,0,0,1,0.857235,0.868032,-1.194048,-0.931062,-0.736856,-0.435496,...,-1.308726,0.345240,-0.681967,False,-0.028427,False,0.139020,-0.473788,0.3,0
2019-12-30,12,0,0,1,1.118233,0.945396,-0.655426,-0.638792,-0.562392,-0.114178,...,-1.308726,0.328207,-0.467490,False,-0.028427,False,-0.299970,-0.469903,0.0,0


In [30]:
#test size =0.8
test_size=int(timeorder.shape[0]*0.8)
train_to=timeorder[:test_size]
test_to=timeorder[test_size:]

#split into features and columns
y_train_to=train_to[['rain_tmr_class']]
y_test_to=test_to[['rain_tmr_class']]
X_train_to=train_to.iloc[:,:-2]
X_test_to=test_to.iloc[:,:-2]

#oversampling
oversample = SMOTE()
X_resampled_to, y_resampled_to = oversample.fit_resample(X_train_to, y_train_to)

#export to csv
X_resampled_to.to_csv('data/train/f_hk_train_to.csv')
X_test_to.to_csv('data/test/f_hk_test_to.csv')
y_resampled_to.to_csv('data/train/t_hk_train_to.csv')
y_test_to.to_csv('data/test/t_hk_test_to.csv')

In [31]:
X_resampled_to.shape

(8422, 22)

In [32]:
y_resampled_to['rain_tmr_class'].value_counts()

1    4211
0    4211
Name: rain_tmr_class, dtype: int64

### dataframe with common columns as AUS

In [33]:
common_cols=['Month', 'spring', 'summer', 'winter', 'Mean Pressure(hPa)','Absolute Daily Max(deg. C)', 'Mean(deg. C)',
            'Absolute Daily min(deg. C)','Mean Relative Humidity(%)','Mean Amount of Cloud(%)',
            'Total Bright Sunshine(hours)','Mean Wind Speed(km/h)','Total Rainfall(mm)',
            'rain_tmr_mm', 'rain_tmr_class']