## Configurando o ambiente

In [19]:
!pyenv exec pip install numpy pandas matplotlib seaborn scikit-learn tensorflow kaggle

Collecting kaggle
  Downloading kaggle-1.6.17.tar.gz (82 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.7/82.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting python-slugify (from kaggle)
  Downloading python_slugify-8.0.4-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting bleach (from kaggle)
  Downloading bleach-6.1.0-py3-none-any.whl.metadata (30 kB)
Collecting webencodings (from bleach->kaggle)
  Downloading webencodings-0.5.1-py2.py3-none-any.whl.metadata (2.1 kB)
Collecting text-unidecode>=1.3 (from python-slugify->kaggle)
  Downloading text_unidecode-1.3-py2.py3-none-any.whl.metadata (2.4 kB)
Downloading bleach-6.1.0-py3-none-any.whl (162 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.8/162.8 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_slugify-8.0.4-py2.py3-none-any.whl (10 kB)
Downloading text_unidecode-1.3-py2.py3-non

## Importando libs

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import os
import zipfile

## Baixando o dataset

In [23]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d sumanthvrao/daily-climate-time-series-data

with zipfile.ZipFile("daily-climate-time-series-data.zip", 'r') as zip_ref:
    zip_ref.extractall("data")


Dataset URL: https://www.kaggle.com/datasets/sumanthvrao/daily-climate-time-series-data
License(s): CC0-1.0
daily-climate-time-series-data.zip: Skipping, found more recently modified local copy (use --force to force download)


## Carregando e explorando o dataset

In [24]:
data = pd.read_csv('data/DailyDelhiClimateTrain.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'DailyDelhiClimateTrain.csv'

In [None]:
data.head()

NameError: name 'data' is not defined

In [None]:
data.describe()

NameError: name 'data' is not defined

## Pré-processamento dos dados

In [None]:
data['date'] = pd.to_datetime(data['date'])

data.set_index('date', inplace=True)

plt.figure(figsize=(14, 5))
plt.plot(data['meantemp'], label='Temperatura Média')
plt.title('Temperatura Média Diária')
plt.xlabel('Data')
plt.ylabel('Temperatura (C)')
plt.legend()
plt.show()

NameError: name 'data' is not defined

## Normalizando os dados

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data['meantemp'].values.reshape(-1, 1))

train_size = int(len(data_scaled) * 0.80)
train, test = data_scaled[0:train_size], data_scaled[train_size:]

def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 10
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test = create_dataset(test, look_back)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

NameError: name 'data' is not defined

## Montando e treinando a RNN

In [None]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

history = model.fit(X_train, Y_train, epochs=20, batch_size=64, validation_data=(X_test, Y_test), verbose=1)

NameError: name 'look_back' is not defined

## Avaliando a RNN

In [None]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

train_predict = scaler.inverse_transform(train_predict)
Y_train = scaler.inverse_transform([Y_train])
test_predict = scaler.inverse_transform(test_predict)
Y_test = scaler.inverse_transform([Y_test])

train_score = np.sqrt(mean_squared_error(Y_train[0], train_predict[:,0]))
test_score = np.sqrt(mean_squared_error(Y_test[0], test_predict[:,0]))
print(f'Train RMSE: {train_score:.2f}')
print(f'Test RMSE: {test_score:.2f}')

NameError: name 'X_train' is not defined

## Visualizando as métricas

In [None]:
train_predict_plot = np.empty_like(data_scaled)
train_predict_plot[:, :] = np.nan
train_predict_plot[look_back:len(train_predict)+look_back, :] = train_predict

test_predict_plot = np.empty_like(data_scaled)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict)+(look_back*2)+1:len(data_scaled)-1, :] = test_predict

plt.figure(figsize=(14, 5))
plt.plot(scaler.inverse_transform(data_scaled), label='Série Temporal Real')
plt.plot(train_predict_plot, label='Previsão Treinamento')
plt.plot(test_predict_plot, label='Previsão Teste')
plt.title('Previsão de Temperatura usando LSTM')
plt.xlabel('Data')
plt.ylabel('Temperatura (C)')
plt.legend()
plt.show()