## Manage Dataset

### Import Packages

In [10]:
import pandas as pd

### Import Dataframe from CSV File

In [11]:
# load csv file from github
url = 'https://raw.githubusercontent.com/floteslof/machine_learning_prognose/main/weather-stations-automated-sensors.csv'
df = pd.read_csv(url)
# Dataset is now stored in a Pandas Dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59144 entries, 0 to 59143
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Station Name                 59144 non-null  object 
 1   Measurement Timestamp        59141 non-null  object 
 2   Air Temperature              59066 non-null  float64
 3   Wet Bulb Temperature         38843 non-null  float64
 4   Humidity                     59141 non-null  float64
 5   Rain Intensity               38843 non-null  float64
 6   Interval Rain                59141 non-null  float64
 7   Total Rain                   38843 non-null  float64
 8   Precipitation Type           38843 non-null  float64
 9   Wind Direction               59141 non-null  float64
 10  Wind Speed                   59141 non-null  float64
 11  Maximum Wind Speed           59141 non-null  float64
 12  Barometric Pressure          58995 non-null  float64
 13  Solar Radiation 

### Rename Columns

In [12]:
df = df.rename(columns={
    'Station Name': 'station_name',
    'Measurement Timestamp': 'date',
    'Air Temperature': 'air_temp',
    'Wet Bulb Temperature': 'wet_bulb_temp',
    'Humidity': 'humidity',
    'Rain Intensity': 'rain_intensity',
    'Interval Rain': 'interval_rain',
    'Total Rain': 'total_rain',
    'Precipitation Type': 'precipitation_type',
    'Wind Direction': 'wind_direction',
    'Wind Speed': 'wind_speed',
    'Maximum Wind Speed': 'max_wind_speed',
    'Barometric Pressure': 'barometric_pressure',
    'Solar Radiation': 'solar_radiation',
    'Heading': 'heading',
    'Battery Life': 'battery_life',
    'Measurement Timestamp Label': 'date_label',
    'Measurement ID': 'id'
})

df.head(3)

Unnamed: 0,station_name,date,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,wind_speed,max_wind_speed,barometric_pressure,solar_radiation,heading,battery_life,date_label,id
0,Oak Street Weather Station,05/22/2015 03:00:00 PM,,7.0,55.0,0.0,0.0,1.4,0.0,63.0,1.9,2.8,,780.0,322.0,12.0,05/22/2015 3:00 PM,OakStreetWeatherStation201505221500
1,Oak Street Weather Station,05/22/2015 05:00:00 PM,,6.3,56.0,0.0,0.0,1.4,0.0,124.0,1.5,2.3,,180.0,322.0,12.1,05/22/2015 5:00 PM,OakStreetWeatherStation201505221700
2,Oak Street Weather Station,05/22/2015 06:00:00 PM,,6.5,54.0,0.0,0.0,1.4,0.0,156.0,1.9,3.4,,127.0,322.0,12.1,05/22/2015 6:00 PM,OakStreetWeatherStation201505221800


### Clean Up Table

#### Remove entries without date

In [13]:
df = df[df['date'].notna()]

#### Remove redundant columns

In [14]:
try:
  df = df.drop(columns=['date_label', 'id'])
except Exception:
  pass

df.head(3)

Unnamed: 0,station_name,date,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,wind_speed,max_wind_speed,barometric_pressure,solar_radiation,heading,battery_life
0,Oak Street Weather Station,05/22/2015 03:00:00 PM,,7.0,55.0,0.0,0.0,1.4,0.0,63.0,1.9,2.8,,780.0,322.0,12.0
1,Oak Street Weather Station,05/22/2015 05:00:00 PM,,6.3,56.0,0.0,0.0,1.4,0.0,124.0,1.5,2.3,,180.0,322.0,12.1
2,Oak Street Weather Station,05/22/2015 06:00:00 PM,,6.5,54.0,0.0,0.0,1.4,0.0,156.0,1.9,3.4,,127.0,322.0,12.1


### Transform Timestamp

#### DateTime Information

Split up date for later processing

In [15]:
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y %I:%M:%S %p')

df['year'] = df['date'].apply(lambda x : x.year)
df['month'] = df['date'].apply(lambda x : x.month)
df['day'] = df['date'].apply(lambda x : x.day)
df['hour'] = df['date'].apply(lambda x : x.hour)
df['minute'] = df['date'].apply(lambda x : x.minute)
df['weekday'] = df['date'].apply(lambda x : x.day_name())
df['weekofyear'] = df['date'].apply(lambda x : x.weekofyear)

df.head(3)

Unnamed: 0,station_name,date,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,...,solar_radiation,heading,battery_life,year,month,day,hour,minute,weekday,weekofyear
0,Oak Street Weather Station,2015-05-22 15:00:00,,7.0,55.0,0.0,0.0,1.4,0.0,63.0,...,780.0,322.0,12.0,2015,5,22,15,0,Friday,21
1,Oak Street Weather Station,2015-05-22 17:00:00,,6.3,56.0,0.0,0.0,1.4,0.0,124.0,...,180.0,322.0,12.1,2015,5,22,17,0,Friday,21
2,Oak Street Weather Station,2015-05-22 18:00:00,,6.5,54.0,0.0,0.0,1.4,0.0,156.0,...,127.0,322.0,12.1,2015,5,22,18,0,Friday,21


#### Seasonal Information

Split up date into seasons

In [16]:
def month2seasons(x):
  if x in [12, 1, 2]:
      season = 'Winter'
  elif x in [3, 4, 5]:
      season = 'Spring'
  elif x in [6, 7, 8]:
      season = 'Summer'
  elif x in [9, 10, 11]:
      season = 'Autumn'

  return season

df['season'] = df['month'].apply(month2seasons)
df.head(3)

Unnamed: 0,station_name,date,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,...,heading,battery_life,year,month,day,hour,minute,weekday,weekofyear,season
0,Oak Street Weather Station,2015-05-22 15:00:00,,7.0,55.0,0.0,0.0,1.4,0.0,63.0,...,322.0,12.0,2015,5,22,15,0,Friday,21,Spring
1,Oak Street Weather Station,2015-05-22 17:00:00,,6.3,56.0,0.0,0.0,1.4,0.0,124.0,...,322.0,12.1,2015,5,22,17,0,Friday,21,Spring
2,Oak Street Weather Station,2015-05-22 18:00:00,,6.5,54.0,0.0,0.0,1.4,0.0,156.0,...,322.0,12.1,2015,5,22,18,0,Friday,21,Spring


#### Daily Timing Information

In [17]:
def hours2timing(x):
    if x in [22,23,0,1,2,3]:
        timing = 'Night'
    elif x in range(4, 12):
        timing = 'Morning'
    elif x in range(12, 17):
        timing = 'Afternoon'
    elif x in range(17, 22):
        timing = 'Evening'
    else:
        timing = 'X'
    return timing

df['timing'] = df['hour'].apply(hours2timing)
df.head(3)

Unnamed: 0,station_name,date,air_temp,wet_bulb_temp,humidity,rain_intensity,interval_rain,total_rain,precipitation_type,wind_direction,...,battery_life,year,month,day,hour,minute,weekday,weekofyear,season,timing
0,Oak Street Weather Station,2015-05-22 15:00:00,,7.0,55.0,0.0,0.0,1.4,0.0,63.0,...,12.0,2015,5,22,15,0,Friday,21,Spring,Afternoon
1,Oak Street Weather Station,2015-05-22 17:00:00,,6.3,56.0,0.0,0.0,1.4,0.0,124.0,...,12.1,2015,5,22,17,0,Friday,21,Spring,Evening
2,Oak Street Weather Station,2015-05-22 18:00:00,,6.5,54.0,0.0,0.0,1.4,0.0,156.0,...,12.1,2015,5,22,18,0,Friday,21,Spring,Evening


### Export Dateset

In [18]:
%store df

Stored 'df' (DataFrame)
