# Project

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import classification_report, plot_confusion_matrix

In [2]:
df = pd.read_csv('data/weatherAUS.csv')

In [3]:
df["Date"] = pd.to_datetime(df["Date"])   # Converting into DateTime
df["Month"] = df["Date"].dt.month         # Abstracting month in new column 

In [4]:
df.drop('Evaporation', axis=1, inplace=True)    # Dropping 'Evaporation' because there are too many null values
df.drop('Date', axis=1, inplace=True)           # Dropping 'Date' because we will only use 'Month'

In [5]:
sunshine = {
    'Albury': {1:12.1, 2:11.2, 3:9.7, 4: 8.1, 5:5.9, 6:4.9, 7: 4.6, 8:5.9, 9:7.5, 10:9.5, 11:10.9, 12:12},
    'Ballarat': {1:10.2, 2:9.3, 3:7.9, 4:6.4, 5:4.6, 6:3.8, 7: 3.9, 8:4.6, 9:5.9, 10:7.2, 11:8.4, 12:9.6},
    'Bendigo': {1:11.9, 2:11.1, 3:9.5, 4:8, 5:5.8, 6:4.9, 7:4.8, 8:5.7, 9:7.2, 10:8.9, 11:10.4, 12:11.6},
    'GoldCoast': {1:9.1, 2:8.8, 3:8.4, 4:8.4, 5:8.3, 6:7.8, 7:8.3, 8:8.8, 9:9.2, 10:9.1, 11:9.5, 12:9.6},
    'Katherine': {1:8.7, 2:8.6, 3:9.1, 4:9.7, 5:9.9, 6:9.9, 7:10.1, 8:10.4, 9:10.7, 10:11, 11:11, 12:10.1},
    'Launceston': {1:9.6, 2:8, 3:6.9, 4:5.6, 5:4.9, 6:4.3, 7:4.4, 8:4.9, 9:5.9, 10:7, 11:7.9, 12:9.2},
    'Newcastle': {1:9.5, 2:9.2, 3:8.6, 4:8.1, 5:8, 6:7.4, 7:7.8, 8:8.6, 9:9.4, 10:9.4, 11:9.7, 12:10.1},
    'Nhil': {1:11.6, 2:10.8, 3:9.1, 4:7.6, 5:5.7, 6:5.1, 7:5.1, 8:6, 9:7, 10:8.5, 11:10, 12:11},
    'Penrith': {1:8.1, 2:7.3, 3:7.1, 4:7.4, 5:7.7, 6:7, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.8},
    'Richmond': {1:8.1, 2:7.4, 3:7.2, 4:7.3, 5:7.6, 6:6.9, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.9},
    'SalmonGums': {1:9.9, 2:8.8, 3:7.9, 4:7.4, 5:6.4, 6:6.2, 7:5.8, 8:6.5, 9:7.7, 10:8.5, 11:9.5, 12:10.2},
    'Tuggeranong': {1:9.9, 2:8.6, 3:8, 4:7, 5:6.2, 6:5, 7:5.4, 8:6.4, 9:7.7, 10:8.6, 11:9.3, 12:10.2},
    'Uluru': {1:9.2, 2:8.6, 3:8.6, 4:8.1, 5:7.2, 6:6.3, 7:7.3, 8:8, 9:8.6, 10:9.4, 11:9.1, 12:9.4},
    'Witchcliffe': {1:9.6, 2:9.1, 3:7.9, 4:6.9, 5:6, 6:5.8, 7:5.5, 8:5.8, 9:5.9, 10:6.7, 11:8.6, 12:9.1},
    'Wollongong': {1:8.9, 2:8.3, 3:8, 4:8, 5:8.1, 6:7.4, 7:7.9, 8:8.5, 9:9.1, 10:9.1, 11:9.2, 12:9.5}
}

albury = {1:12.1, 2:11.2, 3:9.7, 4: 8.1, 5:5.9, 6:4.9, 7: 4.6, 8:5.9, 9:7.5, 10:9.5, 11:10.9, 12:12}
ballarat = {1:10.2, 2:9.3, 3:7.9, 4:6.4, 5:4.6, 6:3.8, 7: 3.9, 8:4.6, 9:5.9, 10:7.2, 11:8.4, 12:9.6}
bendigo = {1:11.9, 2:11.1, 3:9.5, 4:8, 5:5.8, 6:4.9, 7:4.8, 8:5.7, 9:7.2, 10:8.9, 11:10.4, 12:11.6}
goldcoast = {1:9.1, 2:8.8, 3:8.4, 4:8.4, 5:8.3, 6:7.8, 7:8.3, 8:8.8, 9:9.2, 10:9.1, 11:9.5, 12:9.6}
katherine = {1:8.7, 2:8.6, 3:9.1, 4:9.7, 5:9.9, 6:9.9, 7:10.1, 8:10.4, 9:10.7, 10:11, 11:11, 12:10.1}
launceston = {1:9.6, 2:8, 3:6.9, 4:5.6, 5:4.9, 6:4.3, 7:4.4, 8:4.9, 9:5.9, 10:7, 11:7.9, 12:9.2}
newcastle = {1:9.5, 2:9.2, 3:8.6, 4:8.1, 5:8, 6:7.4, 7:7.8, 8:8.6, 9:9.4, 10:9.4, 11:9.7, 12:10.1}
nhil = {1:11.6, 2:10.8, 3:9.1, 4:7.6, 5:5.7, 6:5.1, 7:5.1, 8:6, 9:7, 10:8.5, 11:10, 12:11}
penrith = {1:8.1, 2:7.3, 3:7.1, 4:7.4, 5:7.7, 6:7, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.8}
richmond = {1:8.1, 2:7.4, 3:7.2, 4:7.3, 5:7.6, 6:6.9, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.9}
salmongums = {1:9.9, 2:8.8, 3:7.9, 4:7.4, 5:6.4, 6:6.2, 7:5.8, 8:6.5, 9:7.7, 10:8.5, 11:9.5, 12:10.2}
tuggeranong = {1:9.9, 2:8.6, 3:8, 4:7, 5:6.2, 6:5, 7:5.4, 8:6.4, 9:7.7, 10:8.6, 11:9.3, 12:10.2}
uluru = {1:9.2, 2:8.6, 3:8.6, 4:8.1, 5:7.2, 6:6.3, 7:7.3, 8:8, 9:8.6, 10:9.4, 11:9.1, 12:9.4}
witchcliffe = {1:9.6, 2:9.1, 3:7.9, 4:6.9, 5:6, 6:5.8, 7:5.5, 8:5.8, 9:5.9, 10:6.7, 11:8.6, 12:9.1}
wollongong = {1:8.9, 2:8.3, 3:8, 4:8, 5:8.1, 6:7.4, 7:7.9, 8:8.5, 9:9.1, 10:9.1, 11:9.2, 12:9.5}

In [None]:
df['Location'][0]

In [6]:
df['Location'] in sunshine.keys()

TypeError: 'Series' objects are mutable, thus they cannot be hashed

In [23]:
for key, values in sunshine.items():
    for month, value in values.items():
        df.loc[((df.Location == key) & (df.Month == month)), 'Sunshine'] = value

In [26]:
df.Location.value_counts()

Canberra            3436
Sydney              3344
Brisbane            3193
Hobart              3193
Perth               3193
Melbourne           3193
Adelaide            3193
Darwin              3193
Launceston          3040
Townsville          3040
MountGinini         3040
GoldCoast           3040
Albury              3040
MountGambier        3040
Albany              3040
Ballarat            3040
Wollongong          3040
Bendigo             3040
Cairns              3040
AliceSprings        3040
Newcastle           3039
Tuggeranong         3039
Penrith             3039
WaggaWagga          3009
NorfolkIsland       3009
Witchcliffe         3009
Portland            3009
BadgerysCreek       3009
Sale                3009
MelbourneAirport    3009
Williamtown         3009
Woomera             3009
PearceRAAF          3009
Nuriootpa           3009
Dartmoor            3009
Moree               3009
CoffsHarbour        3009
Cobar               3009
PerthAirport        3009
Mildura             3009


In [None]:
    'NorfolkIsland': {1:7.5, 2:7.1, 3:6.5, 4:6.5, 5:5.9, 6:5.2, 7:5.9, 8:6.7, 9:7.1, 10:7.5, 11:7.9, 12:7.7},
    'Nuriootpa': {1: 10.6, 2: 10.0, 3: 8.7, 4: 7.5, 5: 5.7, 6: 4.9, 7: 5.1, 8: 6.5, 9: 7.2, 10: 8.7, 11: 9.6, 12: 9.9},
    'PearceRAAF': {1: 11.5, 2: 10.9, 3: 9.5, 4: 8.1, 5: 6.8, 6: 6.0, 7: 6.1, 8: 7.1, 9: 7.8, 10: 9.5, 11: 10.7, 12: 11.5},
    'Penrith': {1:8.1, 2:7.3, 3:7.1, 4:7.4, 5:7.7, 6:7, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.8},
    'Perth': {1: 10.6, 2: 10.0, 3: 8.9, 4: 7.3, 5: 5.9, 6: 4.9, 7: 5.3, 8: 6.2, 9: 7.2, 10: 8.4, 11: 9.8, 12: 10.6},
    'PerthAirport': {1: 11.5, 2: 10.9, 3: 9.5, 4: 8.1, 5: 6.8, 6: 6.0, 7: 6.1, 8: 7.1, 9: 7.8, 10: 9.5, 11: 10.7, 12: 11.5},
    'Portland': {1: 9.1, 2: 8.6, 3: 6.9, 4: 5.5, 5: 4.4, 6: 4.0, 7: 4.3, 8: 5.3, 9: 5.7, 10: 7.0, 11: 7.5, 12: 8.2},
    'Richmond': {1:8.1, 2:7.4, 3:7.2, 4:7.3, 5:7.6, 6:6.9, 7:7.6, 8:8.4, 9:8.9, 10:8.7, 11:8.5, 12:8.9},
    'Sale': {1: 8.4, 2: 7.7, 3: 6.8, 4: 6.1, 5: 4.9, 6: 4.4, 7: 5.0, 8: 5.8, 9: 6.4, 10: 7.0, 11: 7.5, 12: 7.9},
    'SalmonGums': {1:9.9, 2:8.8, 3:7.9, 4:7.4, 5:6.4, 6:6.2, 7:5.8, 8:6.5, 9:7.7, 10:8.5, 11:9.5, 12:10.2},
    'Sydney': {1: 7.1, 2: 6.7, 3: 6.4, 4: 6.4, 5: 5.9, 6: 5.5, 7: 6.4, 8: 7.1, 9: 7.2, 10: 7.2, 11: 7.8, 12: 7.6},
    'SydneyAirport': {1: 7.5, 2: 7.2, 3: 6.8, 4: 7.0, 5: 6.4, 6: 5.9, 7: 6.7, 8: 7.9, 9: 8.0, 10: 7.9, 11: 7.7, 12: 7.8},
    'Townsville': {1: 8.0, 2: 7.4, 3: 7.5, 4: 7.9, 5: 7.5, 6: 7.8, 7: 8.4, 8: 9.1, 9: 9.6, 10: 9.8, 11: 9.5, 12: 9.0},
    'Tuggeranong': {1:9.9, 2:8.6, 3:8, 4:7, 5:6.2, 6:5, 7:5.4, 8:6.4, 9:7.7, 10:8.6, 11:9.3, 12:10.2},
    'Uluru': {1:9.2, 2:8.6, 3:8.6, 4:8.1, 5:7.2, 6:6.3, 7:7.3, 8:8, 9:8.6, 10:9.4, 11:9.1, 12:9.4},
    'WaggaWagga': {1: 10.8, 2: 10.1, 3: 9.3, 4: 8.2, 5: 6.3, 6: 4.6, 7: 4.8, 8: 6.4, 9: 7.6, 10: 9.2, 11: 9.7, 12: 10.4},
    'Walpole': {1: 8.3, 2: 7.1, 3: 6.2, 4: 4.8, 5: 4.5, 6: 4.2, 7: 4.3, 8: 5.0, 9: 5.3, 10: 6.4, 11: 6.5, 12: 8.1},
    'Watsonia': {1: 7.9, 2: 7.9, 3: 6.7, 4: 5.6, 5: 4.0, 6: 3.4, 7: 3.7, 8: 4.9, 9: 5.5, 10: 6.7, 11: 7.2, 12: 7.1},
    'Williamtown': {1: 7.4, 2: 7.2, 3: 7.0, 4: 6.9, 5: 6.1, 6: 5.6, 7: 6.4, 8: 7.5, 9: 7.7, 10: 7.6, 11: 7.6, 12: 7.7},
    'Witchcliffe': {1:9.6, 2:9.1, 3:7.9, 4:6.9, 5:6, 6:5.8, 7:5.5, 8:5.8, 9:5.9, 10:6.7, 11:8.6, 12:9.1},
    'Wollongong': {1:8.9, 2:8.3, 3:8, 4:8, 5:8.1, 6:7.4, 7:7.9, 8:8.5, 9:9.1, 10:9.1, 11:9.2, 12:9.5},
    'Woomera': {1: 11.0, 2: 10.5, 3: 9.7, 4: 8.8, 5: 7.5, 6: 6.9, 7: 7.3, 8: 8.4, 9: 9.1, 10: 9.7, 11: 10.4, 12: 10.6}