# Project

In [48]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import classification_report, plot_confusion_matrix

df = pd.read_csv('data/weatherAUS.csv')

df["Date"] = pd.to_datetime(df["Date"])   # Converting into DateTime
df["Month"] = df["Date"].dt.month         # Abstracting month in new column

df.drop('Evaporation', axis=1, inplace=True)    # Dropping 'Evaporation' because there are too many null values
#df.drop('Date', axis=1, inplace=True)           # Dropping 'Date' because we will only use 'Month'

In [49]:
df

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,Month
0,2008-12-01,Albury,13.4,22.9,0.6,,W,44.0,W,WNW,...,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No,12
1,2008-12-02,Albury,7.4,25.1,0.0,,WNW,44.0,NNW,WSW,...,25.0,1010.6,1007.8,,,17.2,24.3,No,No,12
2,2008-12-03,Albury,12.9,25.7,0.0,,WSW,46.0,W,WSW,...,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No,12
3,2008-12-04,Albury,9.2,28.0,0.0,,NE,24.0,SE,E,...,16.0,1017.6,1012.8,,,18.1,26.5,No,No,12
4,2008-12-05,Albury,17.5,32.3,1.0,,W,41.0,ENE,NW,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,2017-06-21,Uluru,2.8,23.4,0.0,,E,31.0,SE,ENE,...,24.0,1024.6,1020.3,,,10.1,22.4,No,No,6
145456,2017-06-22,Uluru,3.6,25.3,0.0,,NNW,22.0,SE,N,...,21.0,1023.5,1019.1,,,10.9,24.5,No,No,6
145457,2017-06-23,Uluru,5.4,26.9,0.0,,N,37.0,SE,WNW,...,24.0,1021.0,1016.8,,,12.5,26.1,No,No,6
145458,2017-06-24,Uluru,7.8,27.0,0.0,,SE,28.0,SSE,N,...,24.0,1019.4,1016.5,3.0,2.0,15.1,26.0,No,No,6


In [56]:
sunshine = {
    'Adelaide': {1:10.5, 2:10.1, 3:8.6, 4:7.3, 5:5.4, 6:4.5, 7:4.7, 8:6.1, 9:6.8, 10:8.3, 11:9.1, 12:9.5},
    'Albany': {1:8.1, 2:7.4, 3:6.6, 4:6.2, 5:5.4, 6:5.1, 7:5.5, 8:6.1, 9:6.3, 10:6.8, 11:7.4, 12:7.9},
    'Albury': {1:12.1, 2:11.2, 3:9.7, 4: 8.1, 5:5.9, 6:4.9, 7: 4.6, 8:5.9, 9:7.5, 10:9.5, 11:10.9, 12:12},
    'AliceSprings': {1:10.2, 2:9.8, 3:9.7, 4:9.5, 5:8.5, 6:8.4, 7:9.1, 8:9.9, 9:10.0, 10:10.1, 11:10.1, 12:10},
    'Ballarat': {1:10.2, 2:9.3, 3:7.9, 4:6.4, 5:4.6, 6:3.8, 7: 3.9, 8:4.6, 9:5.9, 10:7.2, 11:8.4, 12:9.6},
    'Bendigo': {1:11.9, 2:11.1, 3:9.5, 4:8, 5:5.8, 6:4.9, 7:4.8, 8:5.7, 9:7.2, 10:8.9, 11:10.4, 12:11.6},
    'Brisbane': {1:7.4, 2:6.6, 3:6.5, 4:7.2, 5:6.9, 6:6.8, 7:7.4, 8:7.9, 9:8.2, 10:8.0, 11:8.4, 12:8.1},
    'Cairns': {1:6.8, 2:6.2, 3:6.5, 4:6.8, 5:6.8, 6:7.2, 7:7.4, 8:8.1, 9:8.7, 10:8.8, 11:8.5, 12:7.8},
    'Canberra': {1:9.5, 2:9.0, 3:8.1, 4:7.3, 5:6.0, 6:5.2, 7:5.8, 8:7.0, 9:7.7, 10:8.6, 11:8.9, 12:9.4},
    'Cobar': {1:10.8, 2:10.3, 3:9.6, 4:8.9, 5:7.4, 6:6.4, 7:7.0, 8:8.4, 9:9.1, 10:9.8, 11:10.1, 12:10.6},
    'Darwin': {1:5.7, 2:5.9, 3:6.8, 4:8.8, 5:9.6, 6:10.0, 7:10.2, 8:10.4, 9:9.9, 10:9.5, 11:8.4, 12:6.9},
    'CoffsHarbour': {1:7.6, 2:7.3, 3:7.1, 4:7.2, 5:6.7, 6:6.6, 7:7.2, 8:8.3, 9:8.5, 10:8.1, 11:7.9, 12:7.9},
    'GoldCoast': {1:9.1, 2:8.8, 3:8.4, 4:8.4, 5:8.3, 6:7.8, 7:8.3, 8:8.8, 9:9.2, 10:9.1, 11:9.5, 12:9.6},
    'Hobart': {1:8.2, 2:7.9, 3:6.7, 4:5.9, 5:4.7, 6:4.3, 7:4.8, 8:5.7, 9:6.3, 10:7.4, 11:7.4, 12:7.9},
    'Katherine': {1:8.7, 2:8.6, 3:9.1, 4:9.7, 5:9.9, 6:9.9, 7:10.1, 8:10.4, 9:10.7, 10:11, 11:11, 12:10.1},
    'Launceston': {1:9.6, 2:8, 3:6.9, 4:5.6, 5:4.9, 6:4.3, 7:4.4, 8:4.9, 9:5.9, 10:7, 11:7.9, 12:9.2},
    'Melbourne': {1:9.0, 2:8.1, 3:6.8, 4:5.6, 5:3.9, 6:3.6, 7:3.7, 8:4.7, 9:5.7, 10:6.3, 11:7.0, 12:7.5},
    'MelbourneAirport': {1:8.8, 2:8.1, 3:7.2, 4:6.0, 5:4.7, 6:4.1, 7:4.4, 8:5.4, 9:6.2, 10:7.3, 11:7.5, 12:8.4},
    'Mildura': {1:10.8, 2:10.3, 3:9.6, 4:8.3, 5:6.6, 6:5.5, 7:5.9, 8:7.4, 9:8.2, 10:9.5, 11:10.0, 12:10.7},
    'Moree': {1:10.0, 2:9.8, 3:9.4, 4:9.1, 5:8.1, 6:7.1, 7:7.7, 8:9.0, 9:9.5, 10:9.7, 11:9.6, 12:10.0},
    'MountGambier': {1:9.1, 2:8.6, 3:6.9, 4:5.5, 5:4.4, 6:4.0, 7:4.3, 8:5.3, 9:5.7, 10:7.0, 11:7.5, 12:8.2},
    'Newcastle': {1:9.5, 2:9.2, 3:8.6, 4:8.1, 5:8, 6:7.4, 7:7.8, 8:8.6, 9:9.4, 10:9.4, 11:9.7, 12:10.1},
    'Nhil': {1:11.6, 2:10.8, 3:9.1, 4:7.6, 5:5.7, 6:5.1, 7:5.1, 8:6, 9:7, 10:8.5, 11:10, 12:11},
    'NorfolkIsland': {1:7.5, 2:7.1, 3:6.5, 4:6.5, 5:5.9, 6:5.2, 7:5.9, 8:6.7, 9:7.1, 10:7.5, 11:7.9, 12:7.7},
    'Nuriootpa': {1:10.6, 2:10.0, 3:8.7, 4:7.5, 5:5.7, 6:4.9, 7:5.1, 8:6.5, 9:7.2, 10:8.7, 11:9.6, 12:9.9},
    'PearceRAAF': {1:11.5, 2:10.9, 3:9.5, 4:8.1, 5:6.8, 6:6.0, 7:6.1, 8:7.1, 9:7.8, 10:9.5, 11:10.7, 12:11.5},
    'Penrith': {1:8.1, 2:7.3, 3:7.1, 4:7.4, 5:7.7, 6:7, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.8},
    'Perth': {1:10.6, 2:10.0, 3:8.9, 4:7.3, 5:5.9, 6:4.9, 7:5.3, 8:6.2, 9:7.2, 10:8.4, 11:9.8, 12:10.6},
    'PerthAirport': {1:11.5, 2:10.9, 3:9.5, 4:8.1, 5:6.8, 6:6.0, 7:6.1, 8:7.1, 9:7.8, 10:9.5, 11:10.7, 12:11.5},
    'Portland': {1:9.1, 2:8.6, 3:6.9, 4:5.5, 5:4.4, 6:4.0, 7:4.3, 8:5.3, 9:5.7, 10:7.0, 11:7.5, 12:8.2},
    'Richmond': {1:8.1, 2:7.4, 3:7.2, 4:7.3, 5:7.6, 6:6.9, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.9},
    'Sale': {1:8.4, 2:7.7, 3:6.8, 4:6.1, 5:4.9, 6:4.4, 7:5.0, 8:5.8, 9:6.4, 10:7.0, 11:7.5, 12:7.9},
    'SalmonGums': {1:9.9, 2:8.8, 3:7.9, 4:7.4, 5:6.4, 6:6.2, 7:5.8, 8:6.5, 9:7.7, 10:8.5, 11:9.5, 12:10.2},
    'Sydney': {1:7.1, 2:6.7, 3:6.4, 4:6.4, 5:5.9, 6:5.5, 7:6.4, 8:7.1, 9:7.2, 10:7.2, 11:7.8, 12:7.6},
    'SydneyAirport': {1:7.5, 2:7.2, 3:6.8, 4:7.0, 5:6.4, 6:5.9, 7:6.7, 8:7.9, 9:8.0, 10:7.9, 11:7.7, 12:7.8},
    'Townsville': {1:8.0, 2:7.4, 3:7.5, 4:7.9, 5:7.5, 6:7.8, 7:8.4, 8:9.1, 9:9.6, 10:9.8, 11:9.5, 12:9.0},
    'Tuggeranong': {1:9.9, 2:8.6, 3:8, 4:7, 5:6.2, 6:5, 7:5.4, 8:6.4, 9:7.7, 10:8.6, 11:9.3, 12:10.2},
    'Uluru': {1:9.2, 2:8.6, 3:8.6, 4:8.1, 5:7.2, 6:6.3, 7:7.3, 8:8, 9:8.6, 10:9.4, 11:9.1, 12:9.4},
    'WaggaWagga': {1:10.8, 2:10.1, 3:9.3, 4:8.2, 5:6.3, 6:4.6, 7:4.8, 8:6.4, 9:7.6, 10:9.2, 11:9.7, 12:10.4},
    'Walpole': {1:8.3, 2:7.1, 3:6.2, 4:4.8, 5:4.5, 6:4.2, 7:4.3, 8:5.0, 9:5.3, 10:6.4, 11:6.5, 12:8.1},
    'Watsonia': {1:7.9, 2:7.9, 3:6.7, 4:5.6, 5:4.0, 6:3.4, 7:3.7, 8:4.9, 9:5.5, 10:6.7, 11:7.2, 12:7.1},
    'Williamtown': {1:7.4, 2:7.2, 3:7.0, 4:6.9, 5:6.1, 6:5.6, 7:6.4, 8:7.5, 9:7.7, 10:7.6, 11:7.6, 12:7.7},
    'Witchcliffe': {1:9.6, 2:9.1, 3:7.9, 4:6.9, 5:6, 6:5.8, 7:5.5, 8:5.8, 9:5.9, 10:6.7, 11:8.6, 12:9.1},
    'Wollongong': {1:8.9, 2:8.3, 3:8, 4:8, 5:8.1, 6:7.4, 7:7.9, 8:8.5, 9:9.1, 10:9.1, 11:9.2, 12:9.5},
    'Woomera': {1:11.0, 2:10.5, 3:9.7, 4:8.8, 5:7.5, 6:6.9, 7:7.3, 8:8.4, 9:9.1, 10:9.7, 11:10.4, 12:10.6}
}

In [35]:
for loc, sun in sunshine.items():
    for month, hours in sun.items():
            df.loc[((df.Location == loc) & (df.Sunshine.isna()) & (df.Month == month)), 'Sunshine'] = hours