In [23]:
import csv
import json
import re
import pandas as pd
import numpy as np
from pydantic import BaseModel, ValidationError, StrictInt
from typing import Optional, Any

In [24]:
routes = pd.read_csv('Skiturer Sunnmøre - Turer.csv')
class Route(BaseModel):
    route_id: int
    mountain_name: str
    area: str
    route_name: str
    max_start_altitude: int
    avalanche_start_zones: list
    kast: int
    dangers: str
    exposure: str
    difficulty: str
    comment: str
    equipment: str
    max_steepness: Any
    is_descent: bool
    start_location: str
    altitude: int
    geoJSON: dict
    aspect: Any # needs fix

columns = {
  'Fjell': 'mountain_name',
  'Himmelretning (topp til bunn)': 'aspect',
  'Område': 'area',
  'Rute': 'route_name',
  'Høyeste startsted': 'max_start_altitude',
  'Løsneområder (obligatoriske)': 'avalanche_start_zones',
  'KAST': 'kast',
  'Terrengfeller/farer': 'dangers',
  'Eksponering': 'exposure',
  'Vanskelighetsgrad': 'difficulty',
  'Kommentar': 'comment',
  'Utstyr': 'equipment',
  'Max bratthet': 'max_steepness',
  'Primært nedkjøring?': 'is_descent',
  'Startsted': 'start_location',
  'Toppunkt': 'altitude',
  'Hvem': 'who'
}

routes = routes.rename(columns=columns)
routes = routes.drop(labels='who', axis='columns')
routes = routes.dropna(how='all', axis='columns')
routes = routes.fillna('')
routes['route_id'] = routes.index

In [25]:
routes_dict = routes.to_dict(orient='records')

for route in routes_dict:
      for key, val in route.items():
        if val == "FALSE" or val == "USANN":
          route[key] = False
      
        if val == "TRUE" or val == "SANN":
          route[key] = True

      if route['avalanche_start_zones'] == False:
        route['avalanche_start_zones'] = []

      if route['geoJSON']:
        route['geoJSON'] = json.loads(route['geoJSON'])

      if route['aspect']:
        route['aspect'] = route['aspect'].split(' ')

      if route['avalanche_start_zones']:
        elevation_spans = route['avalanche_start_zones'].replace(',', '.').split('.')
        elevation_spans = [tuple(span.strip().split(':')) for span in elevation_spans]
        route['avalanche_start_zones'] = elevation_spans
      


In [26]:
valid_routes = []
invalid_count = 0
for route in routes_dict:
  try:
    r = Route(**route)
    valid_routes.append(r)
  except ValidationError as e:
    error_text = (
      f'{route["mountain_name"]} - {route["route_name"]}: \n'
      f'{e} \n'
      f'-------------'
    )
    invalid_count += 1
    print(error_text)
  


Høgenibba - Normalveien: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Kolåstind - Normalveien: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Grøtdalstind - Sydflanken: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Grøtdalstind - Normalveien: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Litle Eidskyrkja - Sydøstflanken: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Grønetind - Normalveien: 
1 validation error for Route
kast
  value is not a valid integer (type=type_error.integer) 
-------------
Koppefjellet - Normalveien: 
3 validation errors for Route
avalanche_start_zones
  value is not a valid list (type=type_error.list)
kast
  value is not a valid integer (type=type_error.integer)


In [27]:
invalid_count

15

In [28]:
len(valid_routes)

68

In [37]:
long_routes = pd.read_csv('Skiturer Sunnmøre - Langturer.csv')
long_routes = long_routes.rename(columns= {'Navn': 'mountain_name'})
long_routes['is_long_route'] = True
long_route_dicts = long_routes.to_dict(orient='records')


In [None]:
route_dicts = [route.dict() for route in valid_routes]

In [41]:
with open('routes.json', 'w') as file:
    file.write(json.dumps(route_dicts+long_route_dicts))
