# Compulsory 2 - Erlend Risvik

Start by loading in the secret ID

In [1]:
import ast
import requests
import pandas as pd
from pyspark.sql import SparkSession
import os

SECRET_INFO = open("../NO_SYNC/weather_api", 'r').read().replace('\n', '')
SECRET_ID = ast.literal_eval(SECRET_INFO)["client_id"]

In CA1, we selected locality 12765.

Let's start by finding out the GPS coordinates of our locality

In [2]:
os.environ["PYSPARK_PYTHON"] = "python"

spark = SparkSession.builder.appName('SparkCassandraApp').\
    config('spark.jars.packages', 'com.datastax.spark:spark-cassandra-connector_2.12:3.4.1').\
    config('spark.cassandra.connection.host', 'localhost').\
    config('spark.sql.extensions', 'com.datastax.spark.connector.CassandraSparkExtensions').\
    config('spark.sql.catalog.mycatalog', 'com.datastax.spark.connector.datasource.CassandraCatalog').\
    config('spark.cassandra.connection.port', '9042').getOrCreate()

(spark.read.format("org.apache.spark.sql.cassandra")
 .options(table="fish_data_full", keyspace="compulsory")
 .load()
 .createOrReplaceTempView("fish_data_full"))

fish_data = spark.sql("select localityno, lon, lat, municipality, name from fish_data_full").toPandas()

In [3]:
# Select the GPS coordinates of locality 12765, and separate them by space
lat = round(fish_data.loc[fish_data['localityno'] == 12765, 'lat'].values[0], 3)
lon = round(fish_data.loc[fish_data['localityno'] == 12765, 'lon'].values[0], 3)
# Select the municipality and name of the same locality

municipality = fish_data.loc[fish_data['localityno'] == 12765, 'municipality'].values[0]
name = fish_data.loc[fish_data['localityno'] == 12765, 'name'].values[0]

# print lon, lat, municipality, name in f string nicely
print(f"Lon: {lon:.4f}\nLat: {lat:.4f}\nMunicipality: {municipality}\nPlace: {name}")

Lon: 9.9740
Lat: 62.0940
Municipality: Folldal
Place: Aasen Gård


In [68]:
endpoint = 'https://frost.met.no/sources/v0.jsonld'
parameters = {
"geometry" : f"nearest(POINT({lon} {lat}))",
"nearestmaxcount": 15,
}

# Issue an HTTP GET request
r = requests.get(endpoint, parameters, auth=(SECRET_ID,''))
# Extract JSON data
json = r.json()

# Check if the request worked, print out any errors
if r.status_code == 200:
    data = json['data']
    print('Data retrieved from frost.met.no!')
else:
    print('Error! Returned status code %s' % r.status_code)
    print('Message: %s' % json['error']['message'])
    print('Reason: %s' % json['error']['reason'])

Data retrieved from frost.met.no!


In [70]:
# Go through each json object in the data list, and print the shortname, id and distance
for item in data:
    print(f"Shortname: {item['shortName']}\nID: {item['id']}\nDistance: {item['distance']}\n")
    

Shortname: Grimse 
ID: SN9050
Distance: 3.55522119643

Shortname: Folldal 
ID: SN9160
Distance: 3.95215517934

Shortname: Folldal (Li bru)
ID: SN8780
Distance: 9.44318857176

Shortname: FV29 Gravbekklia
ID: SN9250
Distance: 16.62512550816

Shortname: Atnsjøen
ID: SN8720
Distance: 24.25914849366

Shortname: Einunna
ID: SN8970
Distance: 24.47754731071

Shortname: Hjerkinn
ID: SN9310
Distance: 26.478453523970003

Shortname: E6 Hjerkinn
ID: SN9303
Distance: 27.607994447699998

Shortname: E6 Avsjøen
ID: SN16620
Distance: 27.655905071

Shortname: E6 Grønbakken
ID: SN63940
Distance: 28.53821021009

Shortname: Alvdal 
ID: SN8880
Distance: 34.00673936603

Shortname: Høvringen
ID: SN16271
Distance: 34.483657391369995

Shortname: RV3 Bergerønningen
ID: SN9400
Distance: 34.70172348205

Shortname: E6 Fokstugu
ID: SN16611
Distance: 35.83972754819

Shortname: Fokstugu
ID: SN16610
Distance: 35.84706832411



In [114]:
endpoint = 'https://frost.met.no/observations/v0.jsonld'
parameters = {
    'sources': 'SN16610',
    'elements': 'sum(precipitation_amount P1D), mean(air_temperature P1D), mean(wind_speed P1D), mean(relative_humidity P1D)',
    'referencetime': '2015-01-01/2015-12-31',
}

# Issue an HTTP GET request
r = requests.get(endpoint, parameters, auth=(SECRET_ID,''))
# Extract JSON data
json = r.json()

In [115]:
# print lentgh of data
print(f"Length of data: {len(json['data'])}")


Length of data: 364


In [116]:
json

{'@context': 'https://frost.met.no/schema',
 '@type': 'ObservationResponse',
 'apiVersion': 'v0',
 'license': 'https://creativecommons.org/licenses/by/3.0/no/',
 'createdAt': '2023-10-12T22:22:41Z',
 'queryTime': 0.438,
 'currentItemCount': 364,
 'itemsPerPage': 364,
 'offset': 0,
 'totalItemCount': 364,
 'currentLink': 'https://frost.met.no/observations/v0.jsonld?sources=SN16610&elements=sum%28precipitation_amount+P1D%29%2C+mean%28air_temperature+P1D%29%2C+mean%28wind_speed+P1D%29%2C+mean%28relative_humidity+P1D%29&referencetime=2015-01-01%2F2015-12-31',
 'data': [{'sourceId': 'SN16610:0',
   'referenceTime': '2015-01-01T00:00:00.000Z',
   'observations': [{'elementId': 'sum(precipitation_amount P1D)',
     'value': 0.1,
     'unit': 'mm',
     'timeOffset': 'PT18H',
     'timeResolution': 'P1D',
     'timeSeriesId': 0,
     'performanceCategory': 'C',
     'exposureCategory': '2',
     'qualityCode': 2},
    {'elementId': 'sum(precipitation_amount P1D)',
     'value': 0.1,
     'unit

In [117]:
df = pd.DataFrame()
data = json['data']
for i in range(len(data)):
    row = pd.DataFrame(data[i]['observations'])
    row['referenceTime'] = data[i]['referenceTime']
    row['sourceId'] = data[i]['sourceId']
    df = pd.concat([df, row], ignore_index=True)

df = df.reset_index(drop=True)

columns = ['sourceId','referenceTime','elementId','value','unit','timeOffset']
df2 = df[columns].copy()
df2['referenceTime'] = pd.to_datetime(df2['referenceTime'])


In [118]:
# Convert from long to wide. Each row is a day, and each column is a variable in elementid. referencetime is the date
df2

Unnamed: 0,sourceId,referenceTime,elementId,value,unit,timeOffset
0,SN16610:0,2015-01-01 00:00:00+00:00,sum(precipitation_amount P1D),0.1,mm,PT18H
1,SN16610:0,2015-01-01 00:00:00+00:00,sum(precipitation_amount P1D),0.1,mm,PT6H
2,SN16610:0,2015-01-01 00:00:00+00:00,mean(air_temperature P1D),0.9,degC,PT0H
3,SN16610:0,2015-01-01 00:00:00+00:00,mean(air_temperature P1D),2.6,degC,PT6H
4,SN16610:0,2015-01-01 00:00:00+00:00,mean(wind_speed P1D),5.7,m/s,PT0H
...,...,...,...,...,...,...
2177,SN16610:0,2015-12-30 00:00:00+00:00,sum(precipitation_amount P1D),2.3,mm,PT6H
2178,SN16610:0,2015-12-30 00:00:00+00:00,mean(air_temperature P1D),-3.2,degC,PT0H
2179,SN16610:0,2015-12-30 00:00:00+00:00,mean(air_temperature P1D),-6.1,degC,PT6H
2180,SN16610:0,2015-12-30 00:00:00+00:00,mean(wind_speed P1D),16.1,m/s,PT0H


In [126]:
test = df[df["elementId"] == "mean(air_temperature P1D)"].reset_index(drop=True)

# plot value against referenceTime
test


Unnamed: 0,elementId,value,unit,timeOffset,timeResolution,timeSeriesId,performanceCategory,exposureCategory,qualityCode,level,referenceTime,sourceId
0,mean(air_temperature P1D),0.9,degC,PT0H,P1D,0,C,2,0,"{'levelType': 'height_above_ground', 'unit': '...",2015-01-01T00:00:00.000Z,SN16610:0
1,mean(air_temperature P1D),2.6,degC,PT6H,P1D,0,C,2,2,"{'levelType': 'height_above_ground', 'unit': '...",2015-01-01T00:00:00.000Z,SN16610:0
2,mean(air_temperature P1D),-3.1,degC,PT0H,P1D,0,C,2,0,"{'levelType': 'height_above_ground', 'unit': '...",2015-01-02T00:00:00.000Z,SN16610:0
3,mean(air_temperature P1D),0.4,degC,PT6H,P1D,0,C,2,2,"{'levelType': 'height_above_ground', 'unit': '...",2015-01-02T00:00:00.000Z,SN16610:0
4,mean(air_temperature P1D),-6.3,degC,PT0H,P1D,0,C,2,0,"{'levelType': 'height_above_ground', 'unit': '...",2015-01-03T00:00:00.000Z,SN16610:0
...,...,...,...,...,...,...,...,...,...,...,...,...
723,mean(air_temperature P1D),-11.1,degC,PT6H,P1D,0,C,2,2,"{'levelType': 'height_above_ground', 'unit': '...",2015-12-28T00:00:00.000Z,SN16610:0
724,mean(air_temperature P1D),-6.1,degC,PT0H,P1D,0,C,2,0,"{'levelType': 'height_above_ground', 'unit': '...",2015-12-29T00:00:00.000Z,SN16610:0
725,mean(air_temperature P1D),-8.2,degC,PT6H,P1D,0,C,2,2,"{'levelType': 'height_above_ground', 'unit': '...",2015-12-29T00:00:00.000Z,SN16610:0
726,mean(air_temperature P1D),-3.2,degC,PT0H,P1D,0,C,2,0,"{'levelType': 'height_above_ground', 'unit': '...",2015-12-30T00:00:00.000Z,SN16610:0
