# ACORN-SAT using Python and Jupyter Notebook

## Let's load some data...

In [4]:
import io
import requests
import pandas as pd

response = requests.get('http://www.bom.gov.au/climate/change/acorn/sat/data/acorn.sat.minT.086071.daily.txt')
file_object = io.StringIO(response.content.decode('utf-8'))

tmin_melb = pd.read_csv(file_object)

tmin_melb.head()

Unnamed: 0,MIN TEMP 086071 19100101 20150106 missing_value=99999.9 MELBOURNE REGIONAL OFFICE
0,19100101 9.9
1,19100102 14.5
2,19100103 24.3
3,19100104 17.7
4,19100105 13.8


In [6]:
import io
import requests
import pandas as pd

url = "http://www.bom.gov.au/climate/change/acorn/sat/data/acorn.sat.minT.086071.daily.txt"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"}
req = requests.get(url, headers=headers)
data = io.StringIO(req.text)

tmin_melb = pd.read_csv(data, delimiter='\s+', skiprows=1, names=['date', 'tmin'], na_values='99999.9',)

tmin_melb.head()

Unnamed: 0,date,tmin
0,19100101,9.9
1,19100102,14.5
2,19100103,24.3
3,19100104,17.7
4,19100105,13.8


In [5]:
import pandas as pd

tmin_melb = pd.read_csv('86071_tmin.csv', delimiter='\s+', skiprows=1,)

tmin_melb.head()

Unnamed: 0,19100101,9.9
0,19100102,14.5
1,19100103,24.3
2,19100104,17.7
3,19100105,13.8
4,19100106,16.9


In [6]:
import pandas as pd

tmin_melb = pd.read_csv('86071_tmin.csv', delimiter='\s+', skiprows=1, names=['date', 'tmin'], na_values='99999.9',)

tmin_melb.head()

Unnamed: 0,date,tmin
0,19100101,9.9
1,19100102,14.5
2,19100103,24.3
3,19100104,17.7
4,19100105,13.8


In [7]:
import io
import requests
import pandas as pd
import datetime

# Function to convert YYYYMMDD to Python datetime
def make_date(n):
    return datetime.datetime.strptime(n, '%Y%m%d')

url = "http://www.bom.gov.au/climate/change/acorn/sat/data/acorn.sat.minT.086071.daily.txt"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"}
req = requests.get(url, headers=headers)
data = io.StringIO(req.text)


tmin_melb = pd.read_csv(data, delimiter='\s+', skiprows=1, names=['date', 'tmin'], na_values='99999.9',
                       index_col=0, converters={'date':make_date})

tmin_melb.head(5)

Unnamed: 0_level_0,tmin
date,Unnamed: 1_level_1
1910-01-01,9.9
1910-01-02,14.5
1910-01-03,24.3
1910-01-04,17.7
1910-01-05,13.8


In [8]:
# http://www.bom.gov.au/climate/change/hqsites/data/temp/tmin.086338.daily.csv
import io
import requests
import pandas as pd
import datetime

# Function to convert YYYYMMDD to Python datetime
def make_date(n):
    return datetime.datetime.strptime(n, '%Y-%m-%d')

url = "http://www.bom.gov.au/climate/change/hqsites/data/temp/tmin.086338.daily.csv"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"}
req = requests.get(url, headers=headers)
data = io.StringIO(req.text)

tmin_melb = pd.read_csv(data, skiprows=2,  index_col=0, usecols=[0,1], names=['date', 'tmin'],
                       converters={'date': make_date})
tmin_melb.head(5)

Unnamed: 0_level_0,tmin
date,Unnamed: 1_level_1
1910-01-01,10.1
1910-01-02,13.5
1910-01-03,23.3
1910-01-04,16.6
1910-01-05,13.0


In [19]:
import io
import requests
import pandas as pd
import datetime

# Function to convert YYYYMMDD to Python datetime
def make_date(n):
    return pd.to_datetime(n, format='%Y%m%d')


# Get tmin for Melbourne
url = "http://www.bom.gov.au/climate/change/acorn/sat/data/acorn.sat.minT.086071.daily.txt"
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0"}
req = requests.get(url, headers=headers)
data = io.StringIO(req.text)
tmin_melb = pd.read_csv(data, delimiter='\s+', skiprows=1, names=['date', 'tmin'], na_values='99999.9',
                       index_col=0, converters={'date':make_date})

# Get tmax for Melbourne
url = "http://www.bom.gov.au/climate/change/acorn/sat/data/acorn.sat.maxT.086071.daily.txt"
req = requests.get(url, headers=headers)
data = io.StringIO(req.text)
tmax_melb = pd.read_csv(data, delimiter='\s+', skiprows=1, names=['date', 'tmax'], na_values='99999.9',
                       index_col=0, converters={0:make_date})

tmax_melb.head(5)

Unnamed: 0_level_0,tmax
date,Unnamed: 1_level_1
1910-01-01,30.7
1910-01-02,38.6
1910-01-03,36.9
1910-01-04,23.3
1910-01-05,28.5


In [20]:
import pandas as pd

tall_melb = tmin_melb.join(tmax_melb, how='outer')
tall_melb.head(10)

Unnamed: 0_level_0,tmin,tmax
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1910-01-01,9.9,30.7
1910-01-02,14.5,38.6
1910-01-03,24.3,36.9
1910-01-04,17.7,23.3
1910-01-05,13.8,28.5
1910-01-06,16.9,32.6
1910-01-07,17.2,29.6
1910-01-08,17.8,36.2
1910-01-09,18.3,27.3
1910-01-10,18.0,18.5


In [21]:
tall_melb['tmean'] = (tall_melb['tmin'] + tall_melb['tmax']) / 2
tall_melb.head(10)

Unnamed: 0_level_0,tmin,tmax,tmean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1910-01-01,9.9,30.7,20.3
1910-01-02,14.5,38.6,26.55
1910-01-03,24.3,36.9,30.6
1910-01-04,17.7,23.3,20.5
1910-01-05,13.8,28.5,21.15
1910-01-06,16.9,32.6,24.75
1910-01-07,17.2,29.6,23.4
1910-01-08,17.8,36.2,27.0
1910-01-09,18.3,27.3,22.8
1910-01-10,18.0,18.5,18.25


In [22]:
tall_melb['tave'] = (tall_melb['tmin'].shift(1) + tall_melb['tmax']) / 2
tall_melb.head(10)

Unnamed: 0_level_0,tmin,tmax,tmean,tave
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1910-01-01,9.9,30.7,20.3,
1910-01-02,14.5,38.6,26.55,24.25
1910-01-03,24.3,36.9,30.6,25.7
1910-01-04,17.7,23.3,20.5,23.8
1910-01-05,13.8,28.5,21.15,23.1
1910-01-06,16.9,32.6,24.75,23.2
1910-01-07,17.2,29.6,23.4,23.25
1910-01-08,17.8,36.2,27.0,26.7
1910-01-09,18.3,27.3,22.8,22.55
1910-01-10,18.0,18.5,18.25,18.4


In [23]:
tall_melb.describe()

Unnamed: 0,tmin,tmax,tmean,tave
count,38333.0,38349.0,38325.0,38324.0
mean,11.197063,20.294086,15.744868,15.74496
std,4.079829,6.188608,4.736621,4.552461
min,-0.4,7.0,4.2,3.75
25%,8.3,15.6,12.1,12.15
50%,11.0,19.2,15.15,15.25
75%,13.9,23.6,18.7,18.75
max,28.8,46.4,35.45,36.25


In [24]:
tall_melb = tall_melb.drop('tave', axis=1)
tall_melb.head(10)

Unnamed: 0_level_0,tmin,tmax,tmean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1910-01-01,9.9,30.7,20.3
1910-01-02,14.5,38.6,26.55
1910-01-03,24.3,36.9,30.6
1910-01-04,17.7,23.3,20.5
1910-01-05,13.8,28.5,21.15
1910-01-06,16.9,32.6,24.75
1910-01-07,17.2,29.6,23.4
1910-01-08,17.8,36.2,27.0
1910-01-09,18.3,27.3,22.8
1910-01-10,18.0,18.5,18.25


In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

tall_melb.plot()

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
tall_melb['1980-01-01':'1980-12-31'].plot()

In [None]:
tall_melb['1980-01-01':'1980-12-31'].rolling(10).mean().plot()

In [None]:
%matplotlib inline

tall_melb['1980-01-01':'1980-12-31'].rolling(10, win_type='triang').mean().plot() 
# boxcar triang blackman hamming bartlett parzen bohman blackmanharris nuttall barthann

In [None]:
%matplotlib inline

# Can also use an offset - e.g. number of days...
tall_melb['1980-01-01':'1980-12-31'].rolling('10d').mean().plot()

In [None]:
# Group by month...

mnth_melb = tall_melb.groupby(by=tall_melb.index.month)
mnth_melb.mean()

In [None]:
# Find the monthly climatology for Melbourne...

clim = tall_melb['1961-01-01':'1991-01-01']
clim.groupby(by=clim.index.month).mean()

In [None]:
tall_melb['tmax'] > 42

In [None]:
# Find all days with max temp > 42

# tall_melb['tmax'] > 42
tall_melb[tall_melb['tmax'] > 42]

In [None]:
# Look at correlation...

tall_melb['tmax'].corr(tall_melb['tmin'])

In [None]:
tall_melb.corr()

In [None]:
tall_melb.quantile(0.5)

In [None]:
tall_melb.quantile([0.1, 0.5, 0.9])

In [None]:
tall_melb.quantile([0.1, 0.5, 0.9], interpolation='lower') # linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’

In [None]:
import seaborn as sns

correlations = tall_melb.corr()
sns.heatmap(correlations)

In [None]:
# Find the 10 hottest days in Melbourne

tall_melb.sort_values(by='tmax', ascending=False).head(10)

In [None]:
pd.plotting.lag_plot(tall_melb['2010-01-01':'2011-01-01']['tmax'])

In [None]:
pd.plotting.autocorrelation_plot(tall_melb['2010-01-01':'2011-01-01']['tmax'])

In [None]:
resampled = tall_melb['2010-01-01':'2011-01-01'].tmax.resample('W').mean()
resampled.plot()

In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('acorn.db')

stations = pd.read_sql_query("SELECT * FROM stations", conn, index_col='stn_num')
stations.head(10)

In [None]:
stations[['lat', 'lon']].values

In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature 

ax = plt.axes(projection=ccrs.PlateCarree()) 
ax.set_extent([100, 170, -40, -5])         
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)

plt.show()

In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature 

ax = plt.axes(projection=ccrs.PlateCarree()) 
ax.set_extent([100, 170, -40, -5])         
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
ax.scatter(stations.lon.values,stations.lat.values,transform=ccrs.PlateCarree())
plt.show()
# plt.savefig('acornsat.svg')

In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('acorn.db')

data = pd.read_sql_query("""
    SELECT stn_num, lsd, prcp, t_min, t_max 
    FROM observations
    WHERE lsd >= '1960-01-01'
     AND  lsd < 1991-01-01
    """, conn)
data.head(10)

In [None]:
data.groupby(by=data.stn_num).mean()


In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('acorn.db')

data = pd.read_sql_query("""
    SELECT stn_num, lsd, prcp, t_min, t_max 
    FROM observations
    WHERE lsd == '1960-01-01'
    """, conn)
data.head(10)


In [None]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('acorn.db')

data = pd.read_sql_query("""
    SELECT lat, lon, s.stn_num,  t_max 
    FROM observations o, stations s
    WHERE lsd == '2005-01-01'
    AND o.stn_num = s.stn_num
    """, conn)
data.head(10)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

y = data.lat.values 
x = data.lon.values 
t = data.t_max.values
plt.scatter(x, y)
# plt.scatter(data.lon.values,data.lat.values)


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

y = data.lat.values 
x = data.lon.values 
t = data.t_max.values
plt.scatter(x, y, c=t)
plt.colorbar()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

y = data.lat.values 
x = data.lon.values 
t = data.t_max.values
plt.scatter(x, y, c=t, s=t*5, alpha=0.5, cmap='plasma')
plt.colorbar()

In [None]:
import numpy as np
from scipy.interpolate import griddata

X, Y = np.meshgrid(np.linspace(110,155,100), np.linspace(-45,-10,100))

#perform the gridding
grid_temp = griddata((x,y), t, (X, Y))

# plt.clf()
plt.contourf(X,Y,grid_temp, cmap='plasma')
plt.colorbar()

In [None]:
x = tall_melb['1961-01-01':'1991-01-01'].tmin.rolling(5).mean()
x.quantile(0.1)
# tall_melb['1961-01-01':'1991-01-01'].tmin.quantile(0.1)

In [None]:
x = tall_melb['1961-01-01':'1991-01-01'].tmin.rolling(5).mean()
q10 = x.quantile(0.1)
print(q10)
pop = tall_melb['1999-01-01':'2000-01-01']
total = pop.tmin.count()
sub = pop.tmin[pop.tmin < q10].count()
(sub / total) * 100

In [None]:
import math

# a = [1, 1, 1, 1, 1, 1]

a = [1, 1, 1, 2, 3, 5, 7, 7, 11, 13, 13, 13]

b = [min(1, abs(x-y)) for x, y in zip(a, a[1:])] 


p = (b + [1]).index(1)
print(p)
a[:p] = [math.nan] * p
try:
    p = (b[::-1]).index(1)
    a[-p:] = [math.nan] * p
except:
    pass

a


In [None]:
from mpl_toolkits import mplot3d

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# fig = plt.figure()
ax = plt.axes(projection="3d")

plt.show()

In [None]:
# fig = plt.figure()
ax = plt.axes(projection="3d")

z_line = np.linspace(0, 15, 1000)
x_line = np.cos(z_line)
y_line = np.sin(z_line)
ax.plot3D(x_line, y_line, z_line, 'gray')

z_points = 15 * np.random.random(100)
x_points = np.cos(z_points) + 0.1 * np.random.randn(100)
y_points = np.sin(z_points) + 0.1 * np.random.randn(100)
ax.scatter3D(x_points, y_points, z_points, c=z_points, cmap='hsv');

plt.show()
