### Import Packages

In [33]:
import numpy as np

%store -r df

### Data Preparation

#### Remove Unnecessary Columns

In [34]:
try:
  df = df.drop(columns=['date', 'minute', 'wet_bulb_temp', 
                        'rain_intensity', 'interval_rain', 
                        'total_rain', 'precipitation_type', 
                        'wind_direction', 'wind_speed', 
                        'max_wind_speed', 'barometric_pressure', 
                        'solar_radiation', 'heading'])
except Exception:
  pass

df.head(3)

Unnamed: 0,station_name,air_temp,humidity,battery_life,status,year,month,day,hour,weekday,weekofyear,season,timing,air_temp_t-1,humidity_t-1,battery_life_t-1
4,0,9.28,61.0,15.1,1,2015,5,22,17,5,21,0,2,9.17,59.0,15.1
7,0,9.89,52.0,15.1,1,2015,5,22,18,5,21,0,2,9.28,61.0,15.1
9,0,9.56,58.0,15.1,1,2015,5,22,19,5,21,0,2,9.89,52.0,15.1


#### t-1 Functionality

Add t-1 Column for important Columns

In [35]:
if not 'air_temp_t-1' in df: df['air_temp_t-1'] = np.nan
if not 'humidity_t-1' in df: df['humidity_t-1'] = np.nan
if not 'battery_life_t-1' in df: df['battery_life_t-1'] = np.nan

Copy previous Value to t-1 Column

In [36]:
dfs = [df[df['station_name']=='Oak Street Weather Station'], df[df['station_name']=='Foster Weather Station'], df[df['station_name']=='63rd Street Weather Station']]

for x in dfs:
    
    prev = None

    for index, row in x.iterrows():
        try:
            # if prev is set to None throw exception
            df.at[index, 'air_temp_t-1'] = prev['air_temp']
            df.at[index, 'humidity_t-1'] = prev['humidity']
            df.at[index, 'battery_life_t-1'] = prev['battery_life']

        except TypeError:
            pass

        prev = row

df.head(10)

Unnamed: 0,station_name,air_temp,humidity,battery_life,status,year,month,day,hour,weekday,weekofyear,season,timing,air_temp_t-1,humidity_t-1,battery_life_t-1
4,0,9.28,61.0,15.1,1,2015,5,22,17,5,21,0,2,9.17,59.0,15.1
7,0,9.89,52.0,15.1,1,2015,5,22,18,5,21,0,2,9.28,61.0,15.1
9,0,9.56,58.0,15.1,1,2015,5,22,19,5,21,0,2,9.89,52.0,15.1
11,0,9.5,59.0,15.2,1,2015,5,22,20,5,21,0,2,9.56,58.0,15.1
13,0,9.78,62.0,15.2,1,2015,5,22,21,5,21,0,2,9.5,59.0,15.2
14,0,10.0,66.0,15.2,1,2015,5,22,22,5,21,0,3,9.78,62.0,15.2
16,0,10.38,63.0,15.1,1,2015,5,22,23,5,21,0,3,10.0,66.0,15.2
20,0,14.05,55.0,15.1,1,2015,5,23,9,6,21,0,0,13.89,50.0,15.1
22,0,14.77,50.0,15.1,1,2015,5,23,10,6,21,0,0,14.05,55.0,15.1
23,0,14.56,59.0,15.1,1,2015,5,23,11,6,21,0,0,14.77,50.0,15.1


#### Transform Strings to Indices for Model Calculation

In [37]:
dict = {
    'station_name': {
        'Foster Weather Station': 0,
        '63rd Street Weather Station': 1,
        'Oak Street Weather Station': 2
    },
    'weekday': {
        'Sunday': 0,
        'Monday': 1,
        'Tuesday': 2,
        'Wednesday': 3,
        'Thursday': 4,
        'Friday': 5,
        'Saturday': 6
    },
    'season': {
        'Spring': 0,
        'Summer': 1,
        'Autumn': 2,
        'Winter': 3
    },
    'timing': {
        'Morning': 0,
        'Afternoon': 1,
        'Evening': 2,
        'Night': 3
    },
    'status': {
        'offline': 0,
        'online': 1
    }
}

def transform_val(row, classifier):
    val = row[classifier]

    if val in dict[classifier]:
        return dict[classifier][val]
    else:
        return val


for index, row in df.iterrows():
    # if prev is set to None throw exception
    df.at[index, 'station_name'] = transform_val(row, 'station_name')
    df.at[index, 'weekday'] = transform_val(row, 'weekday')
    df.at[index, 'season'] = transform_val(row, 'season')
    df.at[index, 'timing'] = transform_val(row, 'timing')
    df.at[index, 'status'] = transform_val(row, 'status')


#### Clean Up Dataset

This removes NaN Values needed for modelling

In [39]:
df = df.reset_index()

### Export Dataset

In [40]:
%store df

df.head(10)

Stored 'df' (DataFrame)


Unnamed: 0,index,station_name,air_temp,humidity,battery_life,status,year,month,day,hour,weekday,weekofyear,season,timing,air_temp_t-1,humidity_t-1,battery_life_t-1
0,4,0,9.28,61.0,15.1,1,2015,5,22,17,5,21,0,2,9.17,59.0,15.1
1,7,0,9.89,52.0,15.1,1,2015,5,22,18,5,21,0,2,9.28,61.0,15.1
2,9,0,9.56,58.0,15.1,1,2015,5,22,19,5,21,0,2,9.89,52.0,15.1
3,11,0,9.5,59.0,15.2,1,2015,5,22,20,5,21,0,2,9.56,58.0,15.1
4,13,0,9.78,62.0,15.2,1,2015,5,22,21,5,21,0,2,9.5,59.0,15.2
5,14,0,10.0,66.0,15.2,1,2015,5,22,22,5,21,0,3,9.78,62.0,15.2
6,16,0,10.38,63.0,15.1,1,2015,5,22,23,5,21,0,3,10.0,66.0,15.2
7,20,0,14.05,55.0,15.1,1,2015,5,23,9,6,21,0,0,13.89,50.0,15.1
8,22,0,14.77,50.0,15.1,1,2015,5,23,10,6,21,0,0,14.05,55.0,15.1
9,23,0,14.56,59.0,15.1,1,2015,5,23,11,6,21,0,0,14.77,50.0,15.1
