# Compulsory 2 - Erlend Risvik

Start by loading in the secret ID

In [88]:
import ast
import requests
import pandas as pd
from pyspark.sql import SparkSession
import os

SECRET_INFO = open("../NO_SYNC/weather_api", 'r').read().replace('\n', '')
SECRET_ID = ast.literal_eval(SECRET_INFO)["client_id"]

In CA1, we selected locality 12765.

Let's start by finding out the GPS coordinates of our locality

In [41]:
os.environ["PYSPARK_PYTHON"] = "python"

spark = SparkSession.builder.appName('SparkCassandraApp').\
    config('spark.jars.packages', 'com.datastax.spark:spark-cassandra-connector_2.12:3.4.1').\
    config('spark.cassandra.connection.host', 'localhost').\
    config('spark.sql.extensions', 'com.datastax.spark.connector.CassandraSparkExtensions').\
    config('spark.sql.catalog.mycatalog', 'com.datastax.spark.connector.datasource.CassandraCatalog').\
    config('spark.cassandra.connection.port', '9042').getOrCreate()

(spark.read.format("org.apache.spark.sql.cassandra")
 .options(table="fish_data_full", keyspace="compulsory")
 .load()
 .createOrReplaceTempView("fish_data_full"))

fish_data = spark.sql("select localityno, lon, lat, municipality, name from fish_data_full").toPandas()

In [109]:
# Select the GPS coordinates of locality 12765, and separate them by space
lat = round(fish_data.loc[fish_data['localityno'] == 12765, 'lat'].values[0], 3)
lon = round(fish_data.loc[fish_data['localityno'] == 12765, 'lon'].values[0], 3)
# Select the municipality and name of the same locality

municipality = fish_data.loc[fish_data['localityno'] == 12765, 'municipality'].values[0]
name = fish_data.loc[fish_data['localityno'] == 12765, 'name'].values[0]

# print lon, lat, municipality, name in f string nicely
print(f"Lon: {lon:.4f}\nLat: {lat:.4f}\nMunicipality: {municipality}\nPlace: {name}")

Lon: 9.9740
Lat: 62.0940
Municipality: Folldal
Place: Aasen Gård


In [137]:
endpoint = 'https://frost.met.no/sources/v0.jsonld'
parameters = {
"geometry" : f"nearest(POINT({lon} {lat}))",
"nearestmaxcount": 5,
}

# Issue an HTTP GET request
r = requests.get(endpoint, parameters, auth=(SECRET_ID,''))
# Extract JSON data
json = r.json()

# Check if the request worked, print out any errors
if r.status_code == 200:
    data = json['data']
    print('Data retrieved from frost.met.no!')
else:
    print('Error! Returned status code %s' % r.status_code)
    print('Message: %s' % json['error']['message'])
    print('Reason: %s' % json['error']['reason'])

Data retrieved from frost.met.no!


In [138]:
json

{'@context': 'https://frost.met.no/schema',
 '@type': 'SourceResponse',
 'apiVersion': 'v0',
 'license': 'https://creativecommons.org/licenses/by/3.0/no/',
 'createdAt': '2023-10-12T11:08:43Z',
 'queryTime': 1.582,
 'currentItemCount': 5,
 'itemsPerPage': 5,
 'offset': 0,
 'totalItemCount': 5,
 'currentLink': 'https://frost.met.no/sources/v0.jsonld?geometry=nearest%28POINT%289.973999977111816+62.09400177001953%29%29&nearestmaxcount=5',
 'data': [{'@type': 'SensorSystem',
   'id': 'SN9050',
   'name': 'GRIMSE',
   'shortName': 'Grimse ',
   'country': 'Norge',
   'countryCode': 'NO',
   'geometry': {'@type': 'Point',
    'coordinates': [9.9907, 62.063],
    'nearest': False},
   'distance': 3.55522119643,
   'masl': 800,
   'validFrom': '2019-09-01T00:00:00.000Z',
   'county': 'INNLANDET',
   'countyId': 34,
   'municipality': 'FOLLDAL',
   'municipalityId': 3429,
   'stationHolders': ['NVE'],
   'externalIds': ['2.373'],
   'wigosId': '0-578-0-9050'},
  {'@type': 'SensorSystem',
   'id

In [139]:
# Go through each json object in the data list, and print the shortname, id and distance
for item in data:
    print(f"Shortname: {item['shortName']}\nID: {item['id']}\nDistance: {item['distance']}\n")
    

Shortname: Grimse 
ID: SN9050
Distance: 3.55522119643

Shortname: Folldal 
ID: SN9160
Distance: 3.95215517934

Shortname: Folldal (Li bru)
ID: SN8780
Distance: 9.44318857176

Shortname: FV29 Gravbekklia
ID: SN9250
Distance: 16.62512550816

Shortname: Atnsjøen
ID: SN8720
Distance: 24.25914849366



In [220]:
endpoint = 'https://frost.met.no/observations/v0.jsonld'
parameters = {
    'sources': 'SN9250',
    'elements': 'sum(precipitation_amount P1D), mean(air_temperature P1D), mean(wind_speed P1D), mean(relative_humidity P1D)',
    'referencetime': '2015-01-01/2015-12-31',
}

# Issue an HTTP GET request
r = requests.get(endpoint, parameters, auth=(SECRET_ID,''))
# Extract JSON data
json = r.json()

In [218]:
# convert the data in "observations" to a pandas dataframe

pd.json_normalize(json['data'], record_path='observations',
                                     meta=['sourceId', 'referenceTime']

SyntaxError: incomplete input (3890641007.py, line 4)

In [234]:
# check how many entries in json

# get all data where 'observations': [{'elementId': 'mean(air_temperature P1D)'

json["data"]

[{'sourceId': 'SN9250:0',
  'referenceTime': '2015-11-19T00:00:00.000Z',
  'observations': [{'elementId': 'mean(air_temperature P1D)',
    'value': -10.8,
    'unit': 'degC',
    'level': {'levelType': 'height_above_ground', 'unit': 'm', 'value': 2},
    'timeOffset': 'PT0H',
    'timeResolution': 'P1D',
    'timeSeriesId': 0,
    'performanceCategory': 'C',
    'exposureCategory': '4',
    'qualityCode': 2}]},
 {'sourceId': 'SN9250:0',
  'referenceTime': '2015-11-20T00:00:00.000Z',
  'observations': [{'elementId': 'mean(air_temperature P1D)',
    'value': -12.7,
    'unit': 'degC',
    'level': {'levelType': 'height_above_ground', 'unit': 'm', 'value': 2},
    'timeOffset': 'PT0H',
    'timeResolution': 'P1D',
    'timeSeriesId': 0,
    'performanceCategory': 'C',
    'exposureCategory': '4',
    'qualityCode': 2}]},
 {'sourceId': 'SN9250:0',
  'referenceTime': '2015-11-22T00:00:00.000Z',
  'observations': [{'elementId': 'mean(air_temperature P1D)',
    'value': -11.9,
    'unit': 'de