### Import Packages

In [47]:
import numpy as np

%store -r df

### Data Preparation

#### Remove Unnecessary Columns

In [48]:
try:
    df = df.drop(columns=['date', 'minute'])
except Exception:
    pass

df.head(3)

Unnamed: 0,station_name,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,wind_speed,...,battery_life,status,year,month,day,hour,weekday,weekofyear,season,timing
0,63rd Street Weather Station,7.0,5.9,86.0,7.2,5.0,5.2,60.0,119.0,5.1,...,12.0,online,2015,4,25,9,Saturday,17,Spring,Morning
62280,63rd Street Weather Station,,,,,,,,,,...,,offline,2015,4,25,10,Saturday,17,Spring,Morning
62281,63rd Street Weather Station,,,,,,,,,,...,,offline,2015,4,25,11,Saturday,17,Spring,Morning


#### Observe previous Columns Functionality

Select which columns should be observed and for how many generations

In [49]:
column_names = ['air_temp', 'wet_bulb_temp', 'humidity', 'rain_intensity', 'interval_rain', 'total_rain', 'precipitation_type', 'wind_direction', 'wind_speed', 'max_wind_speed', 'barometric_pressure', 'solar_radiation', 'heading', 'battery_life']
generation_count = 5

Add previous Columns

In [50]:
for column in column_names:
    for generation in range(1, generation_count + 1):
        column_prev = column + '_t-' + str(generation)
        
        if not column_prev in df: 
            df[column_prev] = np.nan

Copy previous Value to t-1 Column

In [51]:
dfs = [df[df['station_name']=='Oak Street Weather Station'], df[df['station_name']=='Foster Weather Station'], df[df['station_name']=='63rd Street Weather Station']]

# loop through each station
for x in dfs:
    
    # start with empty list
    prev = []

    # iterate through each row for each station
    for index, row in x.iterrows():

        # loop through each column that should be observed
        for column in column_names:
            # loop through each generation that should be observed
            for generation in range(1, generation_count + 1):

                # if current generation is smaller than prev list
                # necessary if there haven't been enough elements recorded (at the start of the list)
                if generation <= len(prev):
                    column_prev = column + '_t-' + str(generation)

                    # insert value in dataframe from prev list
                    df.at[index, column_prev] = prev[generation - 1][column]

        # prepend new row to prev list for next iteration
        prev = [row] + prev

        # now if prev list has more element than generations to be observed, last element should be removed
        if len(prev) > generation_count:
            del prev[-1]

df.head(10)

Unnamed: 0,station_name,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,wind_speed,...,heading_t-1,heading_t-2,heading_t-3,heading_t-4,heading_t-5,battery_life_t-1,battery_life_t-2,battery_life_t-3,battery_life_t-4,battery_life_t-5
0,63rd Street Weather Station,7.0,5.9,86.0,7.2,5.0,5.2,60.0,119.0,5.1,...,,,,,,,,,,
62280,63rd Street Weather Station,,,,,,,,,,...,354.0,,,,,12.0,,,,
62281,63rd Street Weather Station,,,,,,,,,,...,,354.0,,,,,12.0,,,
62282,63rd Street Weather Station,,,,,,,,,,...,,,354.0,,,,,12.0,,
62283,63rd Street Weather Station,,,,,,,,,,...,,,,354.0,,,,,12.0,
62284,63rd Street Weather Station,,,,,,,,,,...,,,,,354.0,,,,,12.0
62285,63rd Street Weather Station,,,,,,,,,,...,,,,,,,,,,
62286,63rd Street Weather Station,,,,,,,,,,...,,,,,,,,,,
62287,63rd Street Weather Station,,,,,,,,,,...,,,,,,,,,,
62288,63rd Street Weather Station,,,,,,,,,,...,,,,,,,,,,


#### Transform Strings to Numbers for Model Calculation

In [52]:
dict = {
    'station_name': {
        'Foster Weather Station': 0,
        '63rd Street Weather Station': 1,
        'Oak Street Weather Station': 2
    },
    'weekday': {
        'Sunday': 0,
        'Monday': 1,
        'Tuesday': 2,
        'Wednesday': 3,
        'Thursday': 4,
        'Friday': 5,
        'Saturday': 6
    },
    'season': {
        'Spring': 0,
        'Summer': 1,
        'Autumn': 2,
        'Winter': 3
    },
    'timing': {
        'Morning': 0,
        'Afternoon': 1,
        'Evening': 2,
        'Night': 3
    }
}

def transform_val(row, classifier):
    val = row[classifier]

    if val in dict[classifier]:
        return dict[classifier][val]
    else:
        return val


for index, row in df.iterrows():
    # if prev is set to None throw exception
    df.at[index, 'station_name'] = transform_val(row, 'station_name')
    df.at[index, 'weekday'] = transform_val(row, 'weekday')
    df.at[index, 'season'] = transform_val(row, 'season')
    df.at[index, 'timing'] = transform_val(row, 'timing')

#### Clean Up Dataset

This removes NaN Values needed for modelling and replaces them with 0

In [53]:
df = df.fillna(0)

### Export Dataset

In [54]:
%store df

df.head(10)

Stored 'df' (DataFrame)


Unnamed: 0,station_name,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,wind_speed,...,heading_t-1,heading_t-2,heading_t-3,heading_t-4,heading_t-5,battery_life_t-1,battery_life_t-2,battery_life_t-3,battery_life_t-4,battery_life_t-5
0,1,7.0,5.9,86.0,7.2,5.0,5.2,60.0,119.0,5.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62280,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,354.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0
62281,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,354.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0
62282,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,354.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0
62283,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,354.0,0.0,0.0,0.0,0.0,12.0,0.0
62284,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,354.0,0.0,0.0,0.0,0.0,12.0
62285,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62286,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62287,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62288,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
