# Jupyter Notebook Tutorial/Tour

In [63]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [64]:
import pandas as pd
import numpy as np
import xarray as xr
import netCDF4
from sklearn.model_selection  import train_test_split
from sklearn.linear_model import LinearRegression
from dateutil.parser import parse

## Loading Temperature Data and working with it

In [65]:
air_temps = xr.open_dataset('data/air_temperature_2021-04-29T12_00_00Z_108000.nc', engine='netcdf4')
subset = air_temps.sel(longitude=slice(12.436523437499998, 14.084472656249998), latitude=slice(52.05924589011585, 52.9751081817353))
df = subset.to_dataframe()
msk = np.random.rand(len(df)) < 0.8
df.pop('latitude_longitude')
df.pop('forecast_period')
df['forecast_reference_time'] = df['forecast_reference_time'].apply(str)
df['forecast_reference_time'] = df['forecast_reference_time'].apply(parse)
df['forecast_reference_time'] = df['forecast_reference_time'].values.astype(float)
df['time'] = df['time'].apply(str)
df['time'] = df['time'].apply(parse)
df['time'] = df['time'].values.astype(float)
train = df[msk]
test = df[~msk]

y_train = train.pop('air_temperature')
y_test = test.pop('air_temperature')
X_train = train
X_test = test


In [78]:
%%time
model = LinearRegression()
model.fit(X_train, y_train)
None

Wall time: 2.99 ms


In [67]:
print('Predicted temps are:')
print(model.predict(X_test.head()))
print('Actual temps are:')
print(list(y_test.head()))

Predicted temps are:
[241.60612629 241.60612629 241.60612629 241.60612629 241.60612629]
Actual temps are:
[275.25, 268.25, 249.0, 243.25, 217.25]


In [68]:
%sql sqlite://

In [69]:
%%sql
CREATE TABLE sales
(
    key       varchar(6),
    ts        timestamp,
    product   integer,
    completed boolean,
    price     float
);
INSERT INTO sales
VALUES ('sale_1', '2019-11-08 00:00', 0, TRUE, 1.1),
       ('sale_2', '2019-11-08 01:00', 0, FALSE, 1.2),
       ('sale_3', '2019-11-08 01:00', 0, TRUE, 1.3),
       ('sale_4', '2019-11-08 01:00', 1, FALSE, 1.4),
       ('sale_5', '2019-11-08 02:00', 1, TRUE, 1.5),
       ('sale_6', '2019-11-08 02:00', 1, TRUE, 1.5);

 * sqlite://
(sqlite3.OperationalError) table sales already exists
[SQL: CREATE TABLE sales (
    key       varchar(6),
    ts        timestamp,
    product   integer,
    completed boolean,
    price     float
);]
(Background on this error at: http://sqlalche.me/e/14/e3q8)


In [70]:
%%sql

select * from sales;

 * sqlite://
Done.


key,ts,product,completed,price
sale_1,2019-11-08 00:00,0,1,1.1
sale_2,2019-11-08 01:00,0,0,1.2
sale_3,2019-11-08 01:00,0,1,1.3
sale_4,2019-11-08 01:00,1,0,1.4
sale_5,2019-11-08 02:00,1,1,1.5
sale_6,2019-11-08 02:00,1,1,1.5


In [71]:
%%sql

select * from sales where price=1.1;

 * sqlite://
Done.


key,ts,product,completed,price
sale_1,2019-11-08 00:00,0,1,1.1


In [74]:
import folium
import json
import requests

url = (
    "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data"
)
vis1 = json.loads(requests.get(f"{url}/vis1.json").text)
vis2 = json.loads(requests.get(f"{url}/vis2.json").text)
vis3 = json.loads(requests.get(f"{url}/vis3.json").text)


m = folium.Map(location=[46.3014, -123.7390], zoom_start=7, tiles="Stamen Terrain")

folium.Marker(
    location=[47.3489, -124.708],
    popup=folium.Popup(max_width=450).add_child(
        folium.Vega(vis1, width=450, height=250)
    ),
).add_to(m)

folium.Marker(
    location=[44.639, -124.5339],
    popup=folium.Popup(max_width=450).add_child(
        folium.Vega(vis2, width=450, height=250)
    ),
).add_to(m)

folium.Marker(
    location=[46.216, -124.1280],
    popup=folium.Popup(max_width=450).add_child(
        folium.Vega(vis3, width=450, height=250)
    ),
).add_to(m)

m