In [30]:
from symbol import yield_arg

from meteostat import Point, Daily
import pandas as pd
from datetime import datetime

from sklearn.preprocessing import MinMaxScaler

# Define LA
uci = Point(33.6405, -117.8443) # Latitude, Longitude of uci

# Get daily data from 2015 to 2023
start = datetime(2015, 1, 1)
end = datetime(2023, 12, 31)

data = Daily(uci, start, end)
data = data.fetch()
print(data.head())

            tavg  tmin  tmax  prcp  snow  wdir  wspd  wpgt    pres  tsun
time                                                                    
2015-01-01  10.2   4.4  15.6   0.0   NaN   NaN   4.5   NaN  1019.8   NaN
2015-01-02  11.0   5.6  16.1   0.0   NaN   NaN   3.2   NaN  1020.1   NaN
2015-01-03  12.1   6.7  17.2   0.0   NaN   NaN   3.1   NaN  1023.3   NaN
2015-01-04  13.0   6.1  21.1   0.0   NaN   NaN   1.9   NaN  1025.4   NaN
2015-01-05  16.4   8.3  26.1   0.0   NaN   NaN   1.6   NaN  1022.2   NaN


In [31]:
print(data.shape)
print(data.count())

(3287, 10)
tavg    3287
tmin    3287
tmax    3287
prcp    3287
snow    1099
wdir    2126
wspd    3287
wpgt       0
pres    3286
tsun       0
dtype: int64


In [32]:
data['snow'].fillna(0, inplace=True)
data['wpgt'].fillna(0, inplace=True)
data['tsun'].fillna(0, inplace=True)
data['wdir'] = data['wdir'].interpolate(method='linear').fillna(method='bfill')


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try usi

In [33]:
data['day_of_year']=data.index.dayofyear
data['month']=data.index.month
data['season']=(data['month']%12)//3

data['temp_range'] = data['tmax'] - data['tmin']

data['is_rainy'] = (data['prcp']>0).astype(int)
data['is_snowy'] = (data['snow']>0).astype(int)
print(data.head())

            tavg  tmin  tmax  prcp  snow  wdir  wspd  wpgt    pres  tsun  \
time                                                                       
2015-01-01  10.2   4.4  15.6   0.0   0.0  45.0   4.5   0.0  1019.8   0.0   
2015-01-02  11.0   5.6  16.1   0.0   0.0  45.0   3.2   0.0  1020.1   0.0   
2015-01-03  12.1   6.7  17.2   0.0   0.0  45.0   3.1   0.0  1023.3   0.0   
2015-01-04  13.0   6.1  21.1   0.0   0.0  45.0   1.9   0.0  1025.4   0.0   
2015-01-05  16.4   8.3  26.1   0.0   0.0  45.0   1.6   0.0  1022.2   0.0   

            day_of_year  month  season  temp_range  is_rainy  is_snowy  
time                                                                    
2015-01-01            1      1       0        11.2         0         0  
2015-01-02            2      1       0        10.5         0         0  
2015-01-03            3      1       0        10.5         0         0  
2015-01-04            4      1       0        15.0         0         0  
2015-01-05            5      

In [40]:
features = ['tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 
            'day_of_year', 'season', 'temp_range', 'is_rainy', 'is_snowy']
target = ['tavg']
dataset = data[features+target]

from sklearn.preprocessing import MinMaxScaler
import numpy as np
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(dataset)

def create_dataset(data, look_back=30):
    X,y = [],[]
    for i in range(len(data)-look_back-1):
        X.append(data[i:(i+look_back), :-1])
        y.append(data[i+look_back, -1])
    return np.array(X), np.array(y)
X,y = create_dataset(scaled_data)

In [41]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=False
)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

from torch.utils.data import DataLoader, TensorDataset

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [43]:
class LSTM(nn.Module):
    def __init__(self, inputsize, hiddensize, num_layers, outputsize):
        super(LSTM, self).__init__()
        self.hiddensize = hiddensize
        self.num_layers = num_layers
        
        self.lstm  =  nn.LSTM(inputsize, hiddensize, num_layers, batch_first=True)
        self.fc = nn.Linear(hiddensize, outputsize)
        
    def forward(self, x):
        h0=torch.zeros(self.num_layers, x.size(0), self.hiddensize).to(x.device)
        c0=torch.zeros(self.num_layers, x.size(0), self.hiddensize).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        
        out = self.fc(out[:, -1, :])
        return out

imput_size = len(features)
hidden_size = 128
num_layers = 2
output_size = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=LSTM(inputsize=imput_size, hiddensize=hidden_size, num_layers=num_layers, outputsize=output_size).to(device)
print(model)

LSTM(
  (lstm): LSTM(15, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=1, bias=True)
)
