# Weather Trends Observed At YVR-Airport 2016-2017

Weather data was collected at YVR International Airport located in Richmond British Columbia.
Images of the weather were taken at English Bay in Vancouver British Columbia.

In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
#Read and merge data from csv files
def readMergeData(drop_null=True):
    files = [f for f in sorted(glob.glob("./yvr-weather/*"))]

    data_set = pd.read_csv(files[0], sep=',', skiprows=16, parse_dates=[0])
    for i in range(1,13):
        weather_data = pd.read_csv(files[i], sep=',', skiprows=16, parse_dates=[0])
        data_set = data_set.append(weather_data)

    return data_set

#Drop unnecessary columns
def CleanWeatherData(weather_data):
    weather_data.drop(["Date/Time","Day","Time","Temp Flag", "Dew Point Temp Flag",
                     "Rel Hum Flag", "Wind Dir Flag", "Wind Spd Flag",
                     "Visibility Flag", "Stn Press Flag",
                     "Hmdx Flag", "Wind Chill Flag", "Hmdx", "Wind Chill",
                     "Data Quality"], axis=1, inplace=True)


### Preview of raw data obtained from YVR weather csv file

In [3]:
weather_data = readMergeData()
weather_data.head(10)

Unnamed: 0,Date/Time,Year,Month,Day,Time,Data Quality,Temp (°C),Temp Flag,Dew Point Temp (°C),Dew Point Temp Flag,...,Wind Spd Flag,Visibility (km),Visibility Flag,Stn Press (kPa),Stn Press Flag,Hmdx,Hmdx Flag,Wind Chill,Wind Chill Flag,Weather
0,2016-06-01 00:00:00,2016,6,1,00:00,‡,14.9,,13.8,,...,,32.2,,101.28,,,,,,
1,2016-06-01 01:00:00,2016,6,1,01:00,‡,15.1,,13.8,,...,,32.2,,101.26,,,,,,Cloudy
2,2016-06-01 02:00:00,2016,6,1,02:00,‡,15.1,,12.8,,...,,32.2,,101.25,,,,,,
3,2016-06-01 03:00:00,2016,6,1,03:00,‡,14.8,,11.0,,...,,32.2,,101.27,,,,,,
4,2016-06-01 04:00:00,2016,6,1,04:00,‡,14.0,,10.6,,...,,32.2,,101.23,,,,,,Mostly Cloudy
5,2016-06-01 05:00:00,2016,6,1,05:00,‡,13.8,,10.4,,...,,32.2,,101.24,,,,,,
6,2016-06-01 06:00:00,2016,6,1,06:00,‡,14.6,,9.7,,...,,48.3,,101.25,,,,,,
7,2016-06-01 07:00:00,2016,6,1,07:00,‡,16.1,,10.5,,...,,48.3,,101.3,,,,,,Mostly Cloudy
8,2016-06-01 08:00:00,2016,6,1,08:00,‡,16.4,,10.8,,...,,48.3,,101.31,,,,,,
9,2016-06-01 09:00:00,2016,6,1,09:00,‡,17.8,,11.2,,...,,48.3,,101.29,,,,,,


In [4]:
# Clean up the description 
def CleanDescription(weather):

    regex_string = "(Freezing[\s]+)*(Heavy[\s]+)*(Moderate[\s]+)*(Mostly[\s]+)*(Mainly[\s]+)*([\s]+Showers)*([\s]+Pellets)*"
    weather["Weather"] = weather["Weather"].str.replace(regex_string, "")

    rain_snow = "Rain,Snow"
    weather["Weather"] = weather["Weather"].str.replace(rain_snow, "Snow")

    no_thunder = "Thunderstorms"
    weather["Weather"] = weather["Weather"].str.replace(no_thunder, "Cloudy")

    no_drizzle = "Drizzle"
    weather["Weather"] = weather["Weather"].str.replace(no_drizzle, "Cloudy")
    #weather["Weather"].replace("", np.nan, inplace=True)

    #remove artifically generated weather string
    remove = "Rain,Cloudy"
    weather["Weather"] = weather["Weather"].str.replace(remove, "Rain")
    
    remove = "Ice" #classify ice as snow?
    weather["Weather"] = weather["Weather"].str.replace(remove, "Snow")

    fog = "Cloudy,Fog"
    weather["Weather"] = weather["Weather"].str.replace(fog, "Fog")

    snow_fog = "Snow,Fog"
    weather["Weather"] = weather["Weather"].str.replace(snow_fog, "Fog")

    rain_fog = "Rain,Fog"
    weather["Weather"] = weather["Weather"].str.replace(rain_fog, "Fog")

In [5]:
CleanWeatherData(weather_data)
CleanDescription(weather_data)

#Discard all rows with null value
weather_data.dropna(axis=0, inplace=True)
monthly = weather_data.groupby(["Month", "Year"]).aggregate(["mean"])
desc = weather_data[["Year", "Month", "Weather"]]
monthly_weather = desc.groupby(["Month","Weather"]).aggregate(["count"])

In [6]:
monthly_weather

Unnamed: 0_level_0,Unnamed: 1_level_0,Year
Unnamed: 0_level_1,Unnamed: 1_level_1,count
Month,Weather,Unnamed: 2_level_2
1,Clear,75
1,Cloudy,142
1,Fog,9
1,Rain,83
2,Clear,53
2,Cloudy,120
2,Fog,20
2,Rain,66
2,"Rain,Snow",1
2,Snow,66
