In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

import math
from typing import Any, Dict, List, Optional

import rasterio
import contextily
import geopandas
import haversine as hs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import requests
from tqdm.notebook import tqdm

from huhuha.settings import RAW_DATA_DIR, DATA_DIR
from huhuha.ops_tiles import get_otm_tile
from huhuha.utils import get_elevation, random_float
import datetime
tqdm.pandas()
sns.set_theme(style='whitegrid')

In [2]:
# PEAK = 'Dent Parrachée'
# WEATHER_CSV = '45_29__6_76__K.csv'
# PEAK_NUMBER = 'peak_1'

PEAK = 'La Norma'
WEATHER_CSV = '45_18__6_72__K.csv'
PEAK_NUMBER = 'peak_2'

In [3]:
avalnache_data_path =  RAW_DATA_DIR / 'data-avalanche.csv'
df = pd.read_csv(avalnache_data_path)
summits = [
    PEAK
    # 'Dent Parrachée',
    # # 'Punta Bagna',
    # 'La Norma',
]

# ograniczam zbior tylko do danych z z wybranych masywow i lat dla ktorych mam dane pogodowe
df = df[df['sommet'].isin(summits)]


# usuwam wiersze z brakiem informacji o masywach
df = df.dropna(subset=['massif'])

# usuwam wiersze z brakiem informacji o lokalizacji geograficznej
df = df.dropna(subset=['latitude', 'longitude'])
df = df.drop(df[(df.latitude == 0.0) | (df.longitude == 0.0)].index)
df = df[['id', 'sommet', 'date']]


In [4]:
df.head()

Unnamed: 0,id,sommet,date
91,1639300068475,La Norma,1639177200000
239,1617554382779,La Norma,1617487200000
290,1616661015022,La Norma,1616454000000
331,1616184796178,La Norma,1616108400000
338,1616140698621,La Norma,1616108400000


In [5]:
# df_1 = df[df['sommet'] == 'Dent Parrachée']
# df_2 = df[df['sommet'] == 'La Norma']

In [6]:
df['date'] = df.date.apply(lambda d: pd.to_datetime(d, unit='ms'))
df = df[df['date'] > datetime.datetime(2005, 9, 30)]
df.head()

Unnamed: 0,id,sommet,date
91,1639300068475,La Norma,2021-12-10 23:00:00
239,1617554382779,La Norma,2021-04-03 22:00:00
290,1616661015022,La Norma,2021-03-22 23:00:00
331,1616184796178,La Norma,2021-03-18 23:00:00
338,1616140698621,La Norma,2021-03-18 23:00:00


In [7]:
def create_new_dates(d):
    day = datetime.timedelta(days=1)
    d = d.replace(hour=0, minute=0, second=0, microsecond=0)

    return [
        d - 3 * day,
        d - 2 * day,
        d - day,
        d,
        d + day,
        d + 2 * day,
        d + 3 * day,
    ]

df['date'] = df.date.apply(lambda d: create_new_dates(d))
df = df.explode('date')

In [8]:
df['avalanche'] = 1

In [9]:
df.head()

Unnamed: 0,id,sommet,date,avalanche
91,1639300068475,La Norma,2021-12-07,1
91,1639300068475,La Norma,2021-12-08,1
91,1639300068475,La Norma,2021-12-09,1
91,1639300068475,La Norma,2021-12-10,1
91,1639300068475,La Norma,2021-12-11,1


In [10]:
zdf = pd.date_range(
    start='2005-01-20', 
    end='2022-01-26', 
    freq='D',
    name='date')

In [11]:
zdf = zdf.to_frame(index=False, name='date')#.set_index('date')
zdf['avalanche'] = 0

In [12]:
zdf

Unnamed: 0,date,avalanche
0,2005-01-20,0
1,2005-01-21,0
2,2005-01-22,0
3,2005-01-23,0
4,2005-01-24,0
...,...,...
6211,2022-01-22,0
6212,2022-01-23,0
6213,2022-01-24,0
6214,2022-01-25,0


In [13]:
df_1 = df[df['sommet'] == PEAK]
avalanche_days = df_1['date'].apply(lambda d: d.date())


In [14]:
avalanche_days

91      2021-12-07
91      2021-12-08
91      2021-12-09
91      2021-12-10
91      2021-12-11
           ...    
3080    2011-03-18
3080    2011-03-19
3080    2011-03-20
3080    2011-03-21
3080    2011-03-22
Name: date, Length: 245, dtype: object

In [15]:
avd = avalanche_days.to_list()

In [16]:
df_1 = zdf.copy()
df_1['avalanche'] = df_1.date.apply(lambda d: int(d in avd))


df_1.describe

<bound method NDFrame.describe of            date  avalanche
0    2005-01-20          0
1    2005-01-21          0
2    2005-01-22          0
3    2005-01-23          0
4    2005-01-24          0
...         ...        ...
6211 2022-01-22          0
6212 2022-01-23          0
6213 2022-01-24          0
6214 2022-01-25          0
6215 2022-01-26          0

[6216 rows x 2 columns]>

In [17]:
def adjusts_resolution(d):
    hour = datetime.timedelta(hours=1)
    hours = [int((d + i * hour).timestamp()) for i in range(24)]

    return hours

In [18]:
df_2 = df_1.copy()
df_2['dt'] = df_2.date.apply(adjusts_resolution)
df_2 = df_2.explode('dt')
df_2.head(100)

Unnamed: 0,date,avalanche,dt
0,2005-01-20,0,1106179200
0,2005-01-20,0,1106182800
0,2005-01-20,0,1106186400
0,2005-01-20,0,1106190000
0,2005-01-20,0,1106193600
...,...,...,...
3,2005-01-23,0,1106521200
4,2005-01-24,0,1106524800
4,2005-01-24,0,1106528400
4,2005-01-24,0,1106532000


In [19]:
weather_csv = DATA_DIR / 'weather' / WEATHER_CSV

In [20]:
weather_df = pd.read_csv(weather_csv)

In [21]:
avalanche_df = df_2.merge(weather_df, on='dt', how='left')

In [22]:
avalanche_df.to_csv(DATA_DIR / 'weather' / PEAK_NUMBER / 'avalanche_weather.csv', index=False)

In [23]:
# split for seasons

for i in range(2005, 2022):
    season_start = datetime.datetime(i, 9, 30)
    season_stop = datetime.datetime(i+1, 6, 1)

    df_season = avalanche_df[(avalanche_df['date'] > season_start) & (avalanche_df['date'] < season_stop)]
    df_season.to_csv(DATA_DIR / 'weather' / PEAK_NUMBER / f'{i}_avalanche_weather.csv', index=False)
    print(i)

2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
