In [1]:
import scipy.stats as ss
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px

In [26]:
weather_data = pd.read_pickle('data/aus_weather_cln_without_encoding.pkl')
weather_data

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,sort_dir
0,2008-12-01,Albury,13.4,22.900000,0.6,1.4,9.5,W,44.0,W,...,22.0,1007.7,1007.1,8.0,4.0,16.9,21.8,No,No,12
1,2008-12-02,Albury,7.4,25.100000,0.0,0.8,0.3,WNW,44.0,NNW,...,25.0,1010.6,1007.8,7.0,0.0,17.2,24.3,No,No,13
2,2008-12-03,Albury,12.9,25.700000,0.0,6.2,4.7,WSW,46.0,W,...,30.0,1007.6,1008.7,0.0,2.0,21.0,23.2,No,No,11
3,2008-12-04,Albury,9.2,28.000000,0.0,3.8,3.0,NE,24.0,SE,...,16.0,1017.6,1012.8,4.0,3.0,18.1,26.5,No,No,2
4,2008-12-05,Albury,17.5,32.300000,1.0,3.6,11.6,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.400000,0.0,5.0,10.8,E,31.0,SE,...,24.0,1024.6,1020.3,8.0,7.0,10.1,22.4,No,No,4
145456,2017-06-22,Uluru,3.6,25.300000,0.0,5.4,5.6,NNW,22.0,SE,...,21.0,1023.5,1019.1,4.0,7.0,10.9,24.5,No,No,15
145457,2017-06-23,Uluru,5.4,26.900000,0.0,3.2,0.0,N,37.0,SE,...,24.0,1021.0,1016.8,4.0,0.0,12.5,26.1,No,No,0
145458,2017-06-24,Uluru,7.8,27.000000,0.0,1.6,6.6,SE,28.0,SSE,...,24.0,1019.4,1016.5,3.0,2.0,15.1,26.0,No,No,6


In [3]:
weather_data['Date'] = pd.to_datetime(weather_data['Date'])

In [4]:
weather_data.columns

Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',
       'Sunshine', 'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
       'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
       'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
       'Temp3pm', 'RainToday', 'RainTomorrow'],
      dtype='object')

In [5]:
weather_data['Date'].dt.year.unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2007],
      dtype=int64)

In [6]:
def seasons(month):
    if 3 <= month <= 5:
        return 'spring'
    if 6 <= month <= 8:
        return 'summer'
    if 9 <= month <= 11:
        return 'autumn'

    return 'winter'


weather_data['Date'].dt.month.apply(seasons)

0         winter
1         winter
2         winter
3         winter
4         winter
           ...  
145455    summer
145456    summer
145457    summer
145458    summer
145459    summer
Name: Date, Length: 145460, dtype: object

In [10]:
weather_data['Year'] = weather_data['Date'].dt.year
weather_data

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,Year
0,2008-12-01,Albury,13.4,22.900000,0.6,1.4,9.5,W,44.0,W,...,22.0,1007.7,1007.1,8.0,4.0,16.9,21.8,No,No,2008
1,2008-12-02,Albury,7.4,25.100000,0.0,0.8,0.3,WNW,44.0,NNW,...,25.0,1010.6,1007.8,7.0,0.0,17.2,24.3,No,No,2008
2,2008-12-03,Albury,12.9,25.700000,0.0,6.2,4.7,WSW,46.0,W,...,30.0,1007.6,1008.7,0.0,2.0,21.0,23.2,No,No,2008
3,2008-12-04,Albury,9.2,28.000000,0.0,3.8,3.0,NE,24.0,SE,...,16.0,1017.6,1012.8,4.0,3.0,18.1,26.5,No,No,2008
4,2008-12-05,Albury,17.5,32.300000,1.0,3.6,11.6,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No,2008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.400000,0.0,5.0,10.8,E,31.0,SE,...,24.0,1024.6,1020.3,8.0,7.0,10.1,22.4,No,No,2017
145456,2017-06-22,Uluru,3.6,25.300000,0.0,5.4,5.6,NNW,22.0,SE,...,21.0,1023.5,1019.1,4.0,7.0,10.9,24.5,No,No,2017
145457,2017-06-23,Uluru,5.4,26.900000,0.0,3.2,0.0,N,37.0,SE,...,24.0,1021.0,1016.8,4.0,0.0,12.5,26.1,No,No,2017
145458,2017-06-24,Uluru,7.8,27.000000,0.0,1.6,6.6,SE,28.0,SSE,...,24.0,1019.4,1016.5,3.0,2.0,15.1,26.0,No,No,2017


In [7]:
px.data.wind()

Unnamed: 0,direction,strength,frequency
0,N,0-1,0.5
1,NNE,0-1,0.6
2,NE,0-1,0.5
3,ENE,0-1,0.4
4,E,0-1,0.4
...,...,...,...
123,WSW,6+,0.1
124,W,6+,0.9
125,WNW,6+,2.2
126,NW,6+,1.5


In [8]:
hover_text = []

for index, row in weather_data[['Humidity9am', 'Pressure9am', 'Rainfall']].iterrows():
    hover_text.append(('humidity: {h}<br>' +
                       'pressure: {p}<br>' +
                       'rainfall: {r}').format(h=row['Humidity9am'],
                                               p=row['Pressure9am'],
                                               r=row['Rainfall']))

hover_text

['humidity: 71.0<br>pressure: 1007.7<br>rainfall: 0.6',
 'humidity: 44.0<br>pressure: 1010.6<br>rainfall: 0.0',
 'humidity: 38.0<br>pressure: 1007.6<br>rainfall: 0.0',
 'humidity: 45.0<br>pressure: 1017.6<br>rainfall: 0.0',
 'humidity: 82.0<br>pressure: 1010.8<br>rainfall: 1.0',
 'humidity: 55.0<br>pressure: 1009.2<br>rainfall: 0.2',
 'humidity: 49.0<br>pressure: 1009.6<br>rainfall: 0.0',
 'humidity: 48.0<br>pressure: 1013.4<br>rainfall: 0.0',
 'humidity: 42.0<br>pressure: 1008.9<br>rainfall: 0.0',
 'humidity: 58.0<br>pressure: 1007.0<br>rainfall: 1.4',
 'humidity: 48.0<br>pressure: 1011.8<br>rainfall: 0.0',
 'humidity: 89.0<br>pressure: 1010.5<br>rainfall: 2.2',
 'humidity: 76.0<br>pressure: 994.3<br>rainfall: 15.6',
 'humidity: 65.0<br>pressure: 1001.2<br>rainfall: 3.6',
 'humidity: 57.0<br>pressure: 1009.7<br>rainfall: 0.0',
 'humidity: 50.0<br>pressure: 1013.4<br>rainfall: 1.9141149119893643',
 'humidity: 69.0<br>pressure: 1012.2<br>rainfall: 0.0',
 'humidity: 80.0<br>pressure: 100

In [9]:
weather_data['RainToday'].apply(lambda x: 1 if x == 'Yes' else 0).value_counts()

0    112853
1     32607
Name: RainToday, dtype: int64

In [16]:
weather_data[['WindGustDir']]

Unnamed: 0,WindGustDir
0,W
1,WNW
2,WSW
3,NE
4,W
...,...
145455,E
145456,NNW
145457,N
145458,SE


In [17]:
dir = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW']
dir_sort = []

for index, values in weather_data.iterrows():
    if values['WindGustDir'] in dir:
        dir_sort.append(dir.index(values['WindGustDir']))

dir_sort

[12,
 13,
 11,
 2,
 12,
 13,
 12,
 12,
 15,
 12,
 0,
 1,
 12,
 10,
 1,
 13,
 3,
 12,
 7,
 7,
 8,
 2,
 13,
 0,
 12,
 11,
 11,
 13,
 13,
 13,
 12,
 13,
 12,
 7,
 13,
 13,
 12,
 12,
 12,
 2,
 2,
 8,
 10,
 15,
 14,
 13,
 10,
 6,
 13,
 13,
 15,
 15,
 14,
 13,
 12,
 11,
 8,
 1,
 0,
 10,
 1,
 5,
 10,
 10,
 6,
 15,
 11,
 12,
 15,
 14,
 6,
 7,
 7,
 6,
 7,
 2,
 1,
 4,
 0,
 14,
 11,
 9,
 7,
 14,
 14,
 13,
 13,
 5,
 12,
 11,
 12,
 15,
 13,
 13,
 10,
 11,
 12,
 7,
 1,
 2,
 2,
 12,
 3,
 10,
 13,
 12,
 13,
 14,
 11,
 7,
 15,
 2,
 14,
 8,
 3,
 11,
 13,
 15,
 8,
 3,
 11,
 6,
 2,
 12,
 12,
 11,
 12,
 6,
 7,
 3,
 12,
 12,
 11,
 1,
 13,
 12,
 12,
 13,
 12,
 6,
 7,
 5,
 5,
 2,
 13,
 12,
 15,
 14,
 13,
 7,
 10,
 10,
 10,
 8,
 12,
 5,
 10,
 10,
 12,
 12,
 6,
 5,
 12,
 13,
 13,
 13,
 12,
 12,
 4,
 7,
 4,
 6,
 6,
 4,
 6,
 2,
 4,
 12,
 7,
 5,
 6,
 14,
 6,
 6,
 6,
 3,
 12,
 5,
 15,
 13,
 12,
 13,
 6,
 5,
 3,
 1,
 5,
 6,
 11,
 0,
 5,
 1,
 3,
 6,
 12,
 5,
 15,
 6,
 7,
 12,
 2,
 14,
 14,
 13,
 12,
 13,
 12,
 3,
 4,

In [21]:
weather_data.sort_values(by='sort_dir')

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,Year,sort_dir
115105,2010-05-01,PearceRAAF,12.30385,26.051238,0.0,3.4,3.6,N,44.0,WSW,...,1017.709141,1015.320915,8.0,8.0,16.976107,21.72656,No,No,2010,0
83663,2016-07-17,Dartmoor,2.60000,13.100000,0.4,2.0,10.2,N,41.0,NNE,...,1026.300000,1021.100000,8.0,7.0,6.300000,10.80000,No,No,2016,0
20911,2016-11-30,NorahHead,19.20000,25.600000,0.0,1.6,7.8,N,63.0,S,...,1012.600000,1010.000000,6.0,2.0,22.900000,24.30000,No,No,2016,0
83666,2016-07-20,Dartmoor,9.00000,17.000000,0.6,0.4,6.9,N,13.0,NW,...,1023.000000,1020.300000,8.0,4.0,9.600000,16.00000,No,Yes,2016,0
83672,2016-07-26,Dartmoor,3.00000,9.600000,9.8,1.8,10.5,N,35.0,NNW,...,1011.500000,1006.400000,8.0,5.0,5.000000,9.10000,Yes,Yes,2016,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132573,2016-03-25,Hobart,11.40000,21.000000,0.0,3.4,3.4,NNW,43.0,NNW,...,1013.900000,1012.700000,7.0,7.0,13.900000,19.80000,No,No,2016,15
132578,2016-03-30,Hobart,10.70000,21.100000,0.0,3.6,7.8,NNW,28.0,SE,...,1019.600000,1016.900000,7.0,6.0,13.800000,18.00000,No,No,2016,15
132579,2016-03-31,Hobart,10.10000,24.800000,0.0,3.8,8.8,NNW,44.0,NW,...,1015.100000,1011.600000,1.0,5.0,14.300000,23.70000,No,No,2016,15
76490,2013-05-19,Portland,6.10000,15.700000,1.6,0.6,4.5,NNW,26.0,N,...,1018.800000,1014.800000,2.0,8.0,9.200000,14.70000,Yes,Yes,2013,15


In [22]:
temp = weather_data[(weather_data['Location'] == 'Albury') & (weather_data['Year'] == 2017)]
temp = temp.sort_values(by='sort_dir')

wind_rose = px.bar_polar(temp,
                         theta='WindGustDir',
                         color='WindGustSpeed',
                         color_continuous_scale=px.colors.sequential.ice)
wind_rose.show()

In [14]:
pd.options.display.max_rows = 128
px.data.wind()

Unnamed: 0,direction,strength,frequency
0,N,0-1,0.5
1,NNE,0-1,0.6
2,NE,0-1,0.5
3,ENE,0-1,0.4
4,E,0-1,0.4
5,ESE,0-1,0.3
6,SE,0-1,0.4
7,SSE,0-1,0.4
8,S,0-1,0.6
9,SSW,0-1,0.4


In [23]:
weather_data.drop('Year',axis=1,inplace=True)
weather_data

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,sort_dir
0,2008-12-01,Albury,13.4,22.900000,0.6,1.4,9.5,W,44.0,W,...,22.0,1007.7,1007.1,8.0,4.0,16.9,21.8,No,No,12
1,2008-12-02,Albury,7.4,25.100000,0.0,0.8,0.3,WNW,44.0,NNW,...,25.0,1010.6,1007.8,7.0,0.0,17.2,24.3,No,No,13
2,2008-12-03,Albury,12.9,25.700000,0.0,6.2,4.7,WSW,46.0,W,...,30.0,1007.6,1008.7,0.0,2.0,21.0,23.2,No,No,11
3,2008-12-04,Albury,9.2,28.000000,0.0,3.8,3.0,NE,24.0,SE,...,16.0,1017.6,1012.8,4.0,3.0,18.1,26.5,No,No,2
4,2008-12-05,Albury,17.5,32.300000,1.0,3.6,11.6,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.400000,0.0,5.0,10.8,E,31.0,SE,...,24.0,1024.6,1020.3,8.0,7.0,10.1,22.4,No,No,4
145456,2017-06-22,Uluru,3.6,25.300000,0.0,5.4,5.6,NNW,22.0,SE,...,21.0,1023.5,1019.1,4.0,7.0,10.9,24.5,No,No,15
145457,2017-06-23,Uluru,5.4,26.900000,0.0,3.2,0.0,N,37.0,SE,...,24.0,1021.0,1016.8,4.0,0.0,12.5,26.1,No,No,0
145458,2017-06-24,Uluru,7.8,27.000000,0.0,1.6,6.6,SE,28.0,SSE,...,24.0,1019.4,1016.5,3.0,2.0,15.1,26.0,No,No,6


In [25]:
pd.to_pickle(weather_data,'data/aus_weather_cln_without_encoding.pkl')