### Import Dependencies

In [1]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from config import db_password

In [2]:
timberline_df = pd.read_csv('./timberline.csv')

In [3]:
timberline_df

Unnamed: 0,dt,dt_iso,timezone,city_name,lat,lon,temp,dew_point,feels_like,temp_min,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,631152000,1990-01-01 00:00:00 +0000 UTC,-28800,Timberline Lodge,45.331128,-121.711006,36.03,34.21,29.80,36.03,...,,0.31,,,,76,500,Rain,light rain,10d
1,631155600,1990-01-01 01:00:00 +0000 UTC,-28800,Timberline Lodge,45.331128,-121.711006,36.28,34.47,30.67,36.28,...,,0.27,,,,92,500,Rain,light rain,10n
2,631159200,1990-01-01 02:00:00 +0000 UTC,-28800,Timberline Lodge,45.331128,-121.711006,33.69,32.41,27.90,33.69,...,,,,0.21,,98,600,Snow,light snow,13n
3,631162800,1990-01-01 03:00:00 +0000 UTC,-28800,Timberline Lodge,45.331128,-121.711006,34.05,32.77,28.47,34.05,...,,,,0.19,,97,600,Snow,light snow,13n
4,631166400,1990-01-01 04:00:00 +0000 UTC,-28800,Timberline Lodge,45.331128,-121.711006,34.21,32.94,28.67,34.21,...,,,,0.19,,96,600,Snow,light snow,13n
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287587,1666465200,2022-10-22 19:00:00 +0000 UTC,-25200,Timberline Lodge,45.331128,-121.711006,30.90,29.55,18.30,28.22,...,45.99,,,0.76,,92,601,Snow,snow,13d
287588,1666468800,2022-10-22 20:00:00 +0000 UTC,-25200,Timberline Lodge,45.331128,-121.711006,30.52,29.19,17.92,28.06,...,40.00,,,1.52,,87,601,Snow,snow,13d
287589,1666472400,2022-10-22 21:00:00 +0000 UTC,-25200,Timberline Lodge,45.331128,-121.711006,29.62,28.29,17.02,27.95,...,44.00,,,1.78,,87,601,Snow,snow,13d
287590,1666476000,2022-10-22 22:00:00 +0000 UTC,-25200,Timberline Lodge,45.331128,-121.711006,29.91,28.58,17.31,28.85,...,45.99,,,1.27,,87,601,Snow,snow,13d


### Clean Dataset


In [4]:

# Drop irrevelant columns
timberline_df = timberline_df.drop(columns=['timezone', 'lat', 'lon', 'dew_point', 'feels_like', 'pressure', 'sea_level', 'grnd_level', 'wind_speed', 'wind_deg', 'wind_gust', 'rain_1h', 'rain_3h', 'snow_3h', 'clouds_all', 'weather_id', 'weather_icon'])
# Fill NaN with 0
timberline_df = timberline_df.fillna(0)


In [5]:
timberline_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 287592 entries, 0 to 287591
Data columns (total 10 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   dt                   287592 non-null  int64  
 1   dt_iso               287592 non-null  object 
 2   city_name            287592 non-null  object 
 3   temp                 287592 non-null  float64
 4   temp_min             287592 non-null  float64
 5   temp_max             287592 non-null  float64
 6   humidity             287592 non-null  int64  
 7   snow_1h              287592 non-null  float64
 8   weather_main         287592 non-null  object 
 9   weather_description  287592 non-null  object 
dtypes: float64(4), int64(2), object(4)
memory usage: 21.9+ MB


In [6]:
# Convert dt to usable Datetime for index
datetime_info =  pd.to_datetime(timberline_df['dt'],unit='s')


In [7]:
# Set the index as datetime
timberline_datetime_index = timberline_df.set_index(datetime_info)
timberline_datetime_index

Unnamed: 0_level_0,dt,dt_iso,city_name,temp,temp_min,temp_max,humidity,snow_1h,weather_main,weather_description
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1990-01-01 00:00:00,631152000,1990-01-01 00:00:00 +0000 UTC,Timberline Lodge,36.03,36.03,36.03,93,0.00,Rain,light rain
1990-01-01 01:00:00,631155600,1990-01-01 01:00:00 +0000 UTC,Timberline Lodge,36.28,36.28,36.28,93,0.00,Rain,light rain
1990-01-01 02:00:00,631159200,1990-01-01 02:00:00 +0000 UTC,Timberline Lodge,33.69,33.69,33.69,95,0.21,Snow,light snow
1990-01-01 03:00:00,631162800,1990-01-01 03:00:00 +0000 UTC,Timberline Lodge,34.05,34.05,34.05,95,0.19,Snow,light snow
1990-01-01 04:00:00,631166400,1990-01-01 04:00:00 +0000 UTC,Timberline Lodge,34.21,34.21,34.21,95,0.19,Snow,light snow
...,...,...,...,...,...,...,...,...,...,...
2022-10-22 19:00:00,1666465200,2022-10-22 19:00:00 +0000 UTC,Timberline Lodge,30.90,28.22,34.79,94,0.76,Snow,snow
2022-10-22 20:00:00,1666468800,2022-10-22 20:00:00 +0000 UTC,Timberline Lodge,30.52,28.06,35.76,94,1.52,Snow,snow
2022-10-22 21:00:00,1666472400,2022-10-22 21:00:00 +0000 UTC,Timberline Lodge,29.62,27.95,30.87,94,1.78,Snow,snow
2022-10-22 22:00:00,1666476000,2022-10-22 22:00:00 +0000 UTC,Timberline Lodge,29.91,28.85,32.41,94,1.27,Snow,snow


In [8]:
# Resample data to reduce 24 hourly rows to 1 daily row - also remove columns not needed 
# (dt, dt_iso, city_name, weather_main, weather_description)
resampled_df = timberline_datetime_index.resample('D').agg({'temp': 'mean', 'temp_min' : 'mean', 'temp_max' : 'mean', 'humidity' : 'mean', 'snow_1h' : 'sum'})

In [9]:
resampled_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snow_1h
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.980417,32.980417,32.980417,95.000000,12.10
1990-01-02,27.535833,27.535833,27.535833,91.500000,9.90
1990-01-03,30.131250,30.131250,30.131250,91.083333,0.53
1990-01-04,34.822083,34.822083,34.822083,91.791667,4.85
1990-01-05,38.994167,38.994167,38.994167,94.333333,0.00
...,...,...,...,...,...
2022-10-18,48.152917,42.926250,52.189167,48.583333,0.00
2022-10-19,49.899167,45.100417,54.120000,42.875000,0.00
2022-10-20,49.231250,44.384583,54.397500,36.458333,0.00
2022-10-21,39.027917,34.692083,41.542083,74.416667,0.25


In [10]:
# Rename 'snow_1h' column to 'snowfall'
finished_df = resampled_df.rename(columns={"snow_1h" : "snowfall"})
finished_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.980417,32.980417,32.980417,95.000000,12.10
1990-01-02,27.535833,27.535833,27.535833,91.500000,9.90
1990-01-03,30.131250,30.131250,30.131250,91.083333,0.53
1990-01-04,34.822083,34.822083,34.822083,91.791667,4.85
1990-01-05,38.994167,38.994167,38.994167,94.333333,0.00
...,...,...,...,...,...
2022-10-18,48.152917,42.926250,52.189167,48.583333,0.00
2022-10-19,49.899167,45.100417,54.120000,42.875000,0.00
2022-10-20,49.231250,44.384583,54.397500,36.458333,0.00
2022-10-21,39.027917,34.692083,41.542083,74.416667,0.25


In [11]:
# Round all columns to two decimal places
finished_df = finished_df[['temp', 'temp_min', 'temp_max', 'humidity', 'snowfall']].round(2)
finished_df


Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,48.15,42.93,52.19,48.58,0.00
2022-10-19,49.90,45.10,54.12,42.88,0.00
2022-10-20,49.23,44.38,54.40,36.46,0.00
2022-10-21,39.03,34.69,41.54,74.42,0.25


### Export and Load into Postgres

In [12]:
# Export to CSV
finished_df.to_csv('./cleaned_timberline_df.csv')

In [13]:
# db_string used to connect to postgres
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/snow_data"

In [14]:
# create a Database engine to connect to postgres using the db_string
engine = create_engine(db_string)

In [15]:
finished_df.to_sql(name='timberline', con=engine)

ValueError: Table 'timberline' already exists.

### Import and minor clean additional datasets to load into postgres

In [16]:
#Import Furano data
furano_df = pd.read_csv('./cleaned_Furano_Ski_Resort_df.csv')
furano_df

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,16.66,15.24,18.03,89.33,0.00
1,1990-01-02,17.54,14.36,21.16,86.83,0.00
2,1990-01-03,19.62,17.89,23.17,89.96,0.00
3,1990-01-04,19.10,17.63,21.53,85.96,1.48
4,1990-01-05,16.52,14.75,19.98,81.33,0.36
...,...,...,...,...,...,...
11978,2022-10-18,37.61,36.47,38.96,90.17,0.00
11979,2022-10-19,39.69,38.54,41.30,85.83,0.00
11980,2022-10-20,45.09,43.85,47.02,81.12,0.00
11981,2022-10-21,51.45,50.26,53.24,82.42,0.00


In [17]:
furano_df.to_sql(name='furano', con=engine)

ValueError: Table 'furano' already exists.

In [18]:
#import Craigieburn data
craigieburn_df = pd.read_csv('./cleaned_Craigieburn_Valley_df.csv')
craigieburn_df

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,48.73,48.73,48.73,71.79,0.0
1,1990-01-02,49.51,49.51,49.51,81.71,0.0
2,1990-01-03,50.30,50.30,50.30,75.50,0.0
3,1990-01-04,47.45,47.45,47.45,80.33,0.0
4,1990-01-05,44.14,44.14,44.14,79.08,0.0
...,...,...,...,...,...,...
11978,2022-10-18,39.62,39.62,39.62,72.33,0.0
11979,2022-10-19,42.28,42.28,42.28,68.96,0.0
11980,2022-10-20,41.10,41.10,41.10,71.50,0.0
11981,2022-10-21,39.14,39.14,39.14,71.92,0.0


In [19]:
craigieburn_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11983 entries, 0 to 11982
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   dt        11983 non-null  object 
 1   temp      11983 non-null  float64
 2   temp_min  11983 non-null  float64
 3   temp_max  11983 non-null  float64
 4   humidity  11983 non-null  float64
 5   snowfall  11983 non-null  float64
dtypes: float64(5), object(1)
memory usage: 561.8+ KB


In [20]:
# Clean dt column to be date format xxxx/xx/xx
craigieburn_datetime = pd.to_datetime(craigieburn_df['dt'])

In [21]:
# Clean (set index as date)
craigieburn_df = craigieburn_df.set_index(craigieburn_datetime)
craigieburn_df

Unnamed: 0_level_0,dt,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990-01-01,1990-01-01,48.73,48.73,48.73,71.79,0.0
1990-01-02,1990-01-02,49.51,49.51,49.51,81.71,0.0
1990-01-03,1990-01-03,50.30,50.30,50.30,75.50,0.0
1990-01-04,1990-01-04,47.45,47.45,47.45,80.33,0.0
1990-01-05,1990-01-05,44.14,44.14,44.14,79.08,0.0
...,...,...,...,...,...,...
2022-10-18,2022-10-18,39.62,39.62,39.62,72.33,0.0
2022-10-19,2022-10-19,42.28,42.28,42.28,68.96,0.0
2022-10-20,2022-10-20,41.10,41.10,41.10,71.50,0.0
2022-10-21,2022-10-21,39.14,39.14,39.14,71.92,0.0


In [22]:
craigieburn_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 11983 entries, 1990-01-01 to 2022-10-22
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   dt        11983 non-null  object 
 1   temp      11983 non-null  float64
 2   temp_min  11983 non-null  float64
 3   temp_max  11983 non-null  float64
 4   humidity  11983 non-null  float64
 5   snowfall  11983 non-null  float64
dtypes: float64(5), object(1)
memory usage: 655.3+ KB


In [23]:
# Drop dt Column
craigieburn_df = craigieburn_df.drop(columns=['dt'])

In [24]:
craigieburn_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,48.73,48.73,48.73,71.79,0.0
1990-01-02,49.51,49.51,49.51,81.71,0.0
1990-01-03,50.30,50.30,50.30,75.50,0.0
1990-01-04,47.45,47.45,47.45,80.33,0.0
1990-01-05,44.14,44.14,44.14,79.08,0.0
...,...,...,...,...,...
2022-10-18,39.62,39.62,39.62,72.33,0.0
2022-10-19,42.28,42.28,42.28,68.96,0.0
2022-10-20,41.10,41.10,41.10,71.50,0.0
2022-10-21,39.14,39.14,39.14,71.92,0.0


In [25]:
craigieburn_df.to_sql(name='craigieburn', con=engine)

ValueError: Table 'craigieburn' already exists.

In [26]:
# Import Furano Data
furano_df = pd.read_csv('./cleaned_Furano_Ski_resort_df.csv')
furano_df

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,16.66,15.24,18.03,89.33,0.00
1,1990-01-02,17.54,14.36,21.16,86.83,0.00
2,1990-01-03,19.62,17.89,23.17,89.96,0.00
3,1990-01-04,19.10,17.63,21.53,85.96,1.48
4,1990-01-05,16.52,14.75,19.98,81.33,0.36
...,...,...,...,...,...,...
11978,2022-10-18,37.61,36.47,38.96,90.17,0.00
11979,2022-10-19,39.69,38.54,41.30,85.83,0.00
11980,2022-10-20,45.09,43.85,47.02,81.12,0.00
11981,2022-10-21,51.45,50.26,53.24,82.42,0.00


In [27]:
# dtcolumn to date xxxx/xx/xx
furano_datetime = pd.to_datetime(furano_df['dt'])

In [28]:
# Set Date as index
furano_df = furano_df.set_index(furano_datetime)
furano_df

Unnamed: 0_level_0,dt,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990-01-01,1990-01-01,16.66,15.24,18.03,89.33,0.00
1990-01-02,1990-01-02,17.54,14.36,21.16,86.83,0.00
1990-01-03,1990-01-03,19.62,17.89,23.17,89.96,0.00
1990-01-04,1990-01-04,19.10,17.63,21.53,85.96,1.48
1990-01-05,1990-01-05,16.52,14.75,19.98,81.33,0.36
...,...,...,...,...,...,...
2022-10-18,2022-10-18,37.61,36.47,38.96,90.17,0.00
2022-10-19,2022-10-19,39.69,38.54,41.30,85.83,0.00
2022-10-20,2022-10-20,45.09,43.85,47.02,81.12,0.00
2022-10-21,2022-10-21,51.45,50.26,53.24,82.42,0.00


In [29]:
# Drop dt column
furano_df = furano_df.drop(columns=['dt'])
furano_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,16.66,15.24,18.03,89.33,0.00
1990-01-02,17.54,14.36,21.16,86.83,0.00
1990-01-03,19.62,17.89,23.17,89.96,0.00
1990-01-04,19.10,17.63,21.53,85.96,1.48
1990-01-05,16.52,14.75,19.98,81.33,0.36
...,...,...,...,...,...
2022-10-18,37.61,36.47,38.96,90.17,0.00
2022-10-19,39.69,38.54,41.30,85.83,0.00
2022-10-20,45.09,43.85,47.02,81.12,0.00
2022-10-21,51.45,50.26,53.24,82.42,0.00


In [30]:
furano_df.to_sql(name='furano', con=engine)

ValueError: Table 'furano' already exists.

In [31]:
# Import Zermat Data
zermatt_df = pd.read_csv('./cleaned_zermatt.csv')
zermatt_df

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,6.01,5.08,7.02,50.62,0.0
1,1990-01-02,11.84,6.29,16.16,51.88,0.0
2,1990-01-03,14.34,6.50,19.59,42.88,0.0
3,1990-01-04,15.16,5.30,20.41,44.75,0.0
4,1990-01-05,13.71,5.18,18.52,44.17,0.0
...,...,...,...,...,...,...
11978,2022-10-18,50.55,44.43,53.48,69.17,0.0
11979,2022-10-19,52.29,43.86,54.81,42.29,0.0
11980,2022-10-20,48.57,43.60,50.21,74.25,0.0
11981,2022-10-21,46.00,42.57,47.43,97.38,0.0


In [32]:
# Clean dt, and set as index, then drop column
zermatt_datetime = pd.to_datetime(zermatt_df['dt'])
zermatt_df = zermatt_df.set_index(zermatt_datetime)
zermatt_df = zermatt_df.drop(columns=['dt'])


In [33]:
zermatt_df.head(20)

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,6.01,5.08,7.02,50.62,0.0
1990-01-02,11.84,6.29,16.16,51.88,0.0
1990-01-03,14.34,6.5,19.59,42.88,0.0
1990-01-04,15.16,5.3,20.41,44.75,0.0
1990-01-05,13.71,5.18,18.52,44.17,0.0
1990-01-06,15.77,7.55,19.32,66.5,0.0
1990-01-07,10.39,5.75,15.04,71.21,0.0
1990-01-08,12.28,5.62,16.88,43.46,0.0
1990-01-09,13.45,5.84,19.0,40.67,0.0
1990-01-10,15.99,7.94,20.33,57.79,0.0


In [34]:
zermatt_df.to_sql(name='zermatt', con=engine)

ValueError: Table 'zermatt' already exists.

In [35]:
# Import Tiffindell data
tiffindell_df = pd.read_csv('./cleaned_tiffindell.csv')
tiffindell_df.head(20)

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,58.55,58.55,58.55,74.33,0.0
1,1990-01-02,60.58,60.58,60.58,76.46,0.0
2,1990-01-03,61.66,61.66,61.66,79.12,0.0
3,1990-01-04,62.28,62.28,62.28,69.67,0.0
4,1990-01-05,64.27,64.27,64.27,60.5,0.0
5,1990-01-06,62.63,62.63,62.63,63.79,0.0
6,1990-01-07,54.45,54.45,54.45,72.25,0.0
7,1990-01-08,56.67,56.67,56.67,66.21,0.0
8,1990-01-09,57.01,57.01,57.01,80.38,0.0
9,1990-01-10,48.67,48.67,48.67,89.67,0.0


In [36]:
# Clean dt, and set as index, then drop column
tiffindell_datetime = pd.to_datetime(tiffindell_df['dt'])
tiffindell_df = tiffindell_df.set_index(tiffindell_datetime)
tiffindell_df = tiffindell_df.drop(columns=['dt'])
tiffindell_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,58.55,58.55,58.55,74.33,0.0
1990-01-02,60.58,60.58,60.58,76.46,0.0
1990-01-03,61.66,61.66,61.66,79.12,0.0
1990-01-04,62.28,62.28,62.28,69.67,0.0
1990-01-05,64.27,64.27,64.27,60.50,0.0
...,...,...,...,...,...
2022-10-18,57.24,57.24,57.24,64.67,0.0
2022-10-19,59.89,59.89,59.89,59.12,0.0
2022-10-20,58.99,58.99,58.99,68.83,0.0
2022-10-21,58.43,58.43,58.43,71.21,0.0


In [37]:
tiffindell_df.to_sql(name='tiffindell', con=engine)

ValueError: Table 'tiffindell' already exists.

### Create a DataFrame with all the data to use for Machine Learning

In [38]:
# start with timberline append furano
test = finished_df.append(furano_df)

  test = finished_df.append(furano_df)


In [39]:
test

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,37.61,36.47,38.96,90.17,0.00
2022-10-19,39.69,38.54,41.30,85.83,0.00
2022-10-20,45.09,43.85,47.02,81.12,0.00
2022-10-21,51.45,50.26,53.24,82.42,0.00


In [40]:
# append craigieburn to timberline - furano
test = test.append(craigieburn_df)

  test = test.append(craigieburn_df)


In [41]:
test


Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,39.62,39.62,39.62,72.33,0.00
2022-10-19,42.28,42.28,42.28,68.96,0.00
2022-10-20,41.10,41.10,41.10,71.50,0.00
2022-10-21,39.14,39.14,39.14,71.92,0.00


In [42]:
# append tiffindell to timberline furano and craigieburn
test = test.append(tiffindell_df)

  test = test.append(tiffindell_df)


In [43]:
test

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,57.24,57.24,57.24,64.67,0.00
2022-10-19,59.89,59.89,59.89,59.12,0.00
2022-10-20,58.99,58.99,58.99,68.83,0.00
2022-10-21,58.43,58.43,58.43,71.21,0.00


In [44]:
# Append zermatt to timberline furano and craigieburn tiffindell
test = test.append(zermatt_df)

  test = test.append(zermatt_df)


In [45]:
test


Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,50.55,44.43,53.48,69.17,0.00
2022-10-19,52.29,43.86,54.81,42.29,0.00
2022-10-20,48.57,43.60,50.21,74.25,0.00
2022-10-21,46.00,42.57,47.43,97.38,0.00


In [46]:
# Import valle nevado data
valle_df = pd.read_csv('./cleaned_Valle_Nevado_df.csv')


In [47]:
valle_df

Unnamed: 0,dt,temp,temp_min,temp_max,humidity,snowfall
0,1990-01-01,51.52,42.37,65.42,57.54,0.0
1,1990-01-02,50.55,39.14,69.13,57.04,0.0
2,1990-01-03,52.62,43.07,68.73,59.42,0.0
3,1990-01-04,53.63,42.83,71.49,56.33,0.0
4,1990-01-05,54.17,44.52,70.89,61.54,0.0
...,...,...,...,...,...,...
11978,2022-10-18,32.11,26.89,34.64,62.96,0.0
11979,2022-10-19,31.18,26.45,33.50,61.00,0.0
11980,2022-10-20,35.68,31.36,39.19,39.46,0.0
11981,2022-10-21,39.40,34.36,43.16,35.50,0.0


In [48]:
# Clean valle nevado data change dt to date xxxx/xx/xx make date index and drop dt column
valle_datetime = pd.to_datetime(valle_df['dt'])
valle_df = valle_df.set_index(valle_datetime)
valle_df = valle_df.drop(columns=['dt'])

In [49]:
valle_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,51.52,42.37,65.42,57.54,0.0
1990-01-02,50.55,39.14,69.13,57.04,0.0
1990-01-03,52.62,43.07,68.73,59.42,0.0
1990-01-04,53.63,42.83,71.49,56.33,0.0
1990-01-05,54.17,44.52,70.89,61.54,0.0
...,...,...,...,...,...
2022-10-18,32.11,26.89,34.64,62.96,0.0
2022-10-19,31.18,26.45,33.50,61.00,0.0
2022-10-20,35.68,31.36,39.19,39.46,0.0
2022-10-21,39.40,34.36,43.16,35.50,0.0


In [50]:
# append valle nevado to timberline furano craigieburn tiffindell zermatt
ml_df = test.append(valle_df)

  ml_df = test.append(valle_df)


In [51]:
ml_df


Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,12.10
1990-01-02,27.54,27.54,27.54,91.50,9.90
1990-01-03,30.13,30.13,30.13,91.08,0.53
1990-01-04,34.82,34.82,34.82,91.79,4.85
1990-01-05,38.99,38.99,38.99,94.33,0.00
...,...,...,...,...,...
2022-10-18,32.11,26.89,34.64,62.96,0.00
2022-10-19,31.18,26.45,33.50,61.00,0.00
2022-10-20,35.68,31.36,39.19,39.46,0.00
2022-10-21,39.40,34.36,43.16,35.50,0.00


In [52]:
# Replace values of snowfall 1 (yes) 0 (no) of Given column by using np.where() function.
ml_df['snowfall'] = np.where(ml_df['snowfall'] > 0, 1, ml_df['snowfall'])


In [53]:
#Use get_dummies to get rid of the decimal in ml_df snowfall column
dummies_column = pd.get_dummies(ml_df['snowfall'])
dummies_column

Unnamed: 0_level_0,0.0,1.0
dt,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-01-01,0,1
1990-01-02,0,1
1990-01-03,0,1
1990-01-04,0,1
1990-01-05,1,0
...,...,...
2022-10-18,1,0
2022-10-19,1,0
2022-10-20,1,0
2022-10-21,1,0


In [57]:
# make snowfall coulumn = column '1.0' of dummies_column
ml_df['snowfall'] = dummies_column[1]
ml_df

Unnamed: 0_level_0,temp,temp_min,temp_max,humidity,snowfall
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1990-01-01,32.98,32.98,32.98,95.00,1
1990-01-02,27.54,27.54,27.54,91.50,1
1990-01-03,30.13,30.13,30.13,91.08,1
1990-01-04,34.82,34.82,34.82,91.79,1
1990-01-05,38.99,38.99,38.99,94.33,0
...,...,...,...,...,...
2022-10-18,32.11,26.89,34.64,62.96,0
2022-10-19,31.18,26.45,33.50,61.00,0
2022-10-20,35.68,31.36,39.19,39.46,0
2022-10-21,39.40,34.36,43.16,35.50,0


In [58]:
ml_df.to_csv('./ml_df.csv')