#predição de uma série temporal
##Aluno: Yago Phellipe Matos Lopes
###Curso: Ciência da Computação

In [4]:
import os
import warnings
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
import math
from prophet import Prophet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from google.colab import drive

# Montar o Google Drive
drive.mount('/content/drive')

# Ignorar avisos
warnings.filterwarnings('ignore')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Caminho do arquivo ZIP no Google Drive
zip_path = "/content/drive/MyDrive/time_series/archive.zip"
extract_dir = "/content/extracted_data"

# Criar diretório caso não exista
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

# Extrair os arquivos do ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Identificar e carregar o CSV
csv_files = [f for f in os.listdir(extract_dir) if f.endswith('.csv')]
if csv_files:
    csv_path = os.path.join(extract_dir, csv_files[0])
    df = pd.read_csv(csv_path)
else:
    raise FileNotFoundError("Nenhum arquivo CSV encontrado no arquivo ZIP extraído.")

# Visualizar as primeiras linhas do dataset
df.head()


Unnamed: 0,SN,Commodity,Date,Unit,Minimum,Maximum,Average
0,0,Tomato Big(Nepali),2013-06-16,Kg,35.0,40.0,37.5
1,1,Tomato Small(Local),2013-06-16,Kg,26.0,32.0,29.0
2,2,Potato Red,2013-06-16,Kg,20.0,21.0,20.5
3,3,Potato White,2013-06-16,Kg,15.0,16.0,15.5
4,4,Onion Dry (Indian),2013-06-16,Kg,28.0,30.0,29.0


In [6]:
# Filtrar apenas a commodity "Potato White"
df_potato = df[df["Commodity"] == "Potato White"].copy()
df_potato["ds"] = pd.to_datetime(df_potato["Date"], errors="coerce")
df_potato["y"] = df_potato["Average"]
df_potato = df_potato[["ds", "y"]].dropna().sort_values("ds")

# Exibir os primeiros dados filtrados
df_potato.head()


Unnamed: 0,ds,y
3,2013-06-16,15.5
77,2013-06-17,15.5
150,2013-06-18,15.5
223,2013-06-19,15.5
296,2013-06-20,15.5


In [7]:
# Definir tamanho do conjunto de treino (80%)
train_size = int(len(df_potato) * 0.8)

# Criar conjunto de treino e teste
train_data = df_potato.iloc[:train_size]
test_data = df_potato.iloc[train_size:]

print(f"Tamanho do treino: {len(train_data)}")
print(f"Tamanho do teste: {len(test_data)}")


Tamanho do treino: 1904
Tamanho do teste: 477


In [8]:
# Criar e treinar o modelo Prophet
model_prophet = Prophet(yearly_seasonality=True, weekly_seasonality=True, seasonality_mode='multiplicative')
model_prophet.fit(train_data)

# Criar previsões para o período de teste
future = model_prophet.make_future_dataframe(periods=len(test_data))
forecast = model_prophet.predict(future)

# Selecionar apenas as previsões correspondentes ao conjunto de teste
prophet_pred = forecast.iloc[-len(test_data):][['ds', 'yhat']]

# Visualizar previsões
prophet_pred.head()


INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpzwarh6_5/obejmcr8.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpzwarh6_5/dkpzv21l.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=63217', 'data', 'file=/tmp/tmpzwarh6_5/obejmcr8.json', 'init=/tmp/tmpzwarh6_5/dkpzv21l.json', 'output', 'file=/tmp/tmpzwarh6_5/prophet_modelrlr5bkvx/prophet_model-20250331184732.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
18:47:32 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
18:47:33 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Unnamed: 0,ds,yhat
1904,2019-02-21,22.373953
1905,2019-02-22,22.447422
1906,2019-02-23,22.586553
1907,2019-02-24,22.923834
1908,2019-02-25,22.864301


In [9]:
# Normalizar os dados com MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_scaled = scaler.fit_transform(df_potato['y'].values.reshape(-1, 1))

# Função para criar sequência de dados para LSTM
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step, 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Definir o número de passos de tempo
time_step = 10

# Criar datasets para treino e teste
train_scaled = dataset_scaled[:train_size]
test_scaled = dataset_scaled[train_size - time_step:]

X_train, y_train = create_dataset(train_scaled, time_step)
X_test, y_test = create_dataset(test_scaled, time_step)

# Ajustar o formato para o modelo LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print(f"Formato de X_train: {X_train.shape}")
print(f"Formato de X_test: {X_test.shape}")


Formato de X_train: (1894, 10, 1)
Formato de X_test: (477, 10, 1)


In [10]:
# Criar modelo LSTM
model_lstm = Sequential([
    LSTM(50, return_sequences=True, input_shape=(time_step, 1)),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])

# Compilar o modelo
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Definir early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Treinar modelo
model_lstm.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)


Epoch 1/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - loss: 0.0095 - val_loss: 9.7290e-04
Epoch 2/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0012 - val_loss: 9.0765e-04
Epoch 3/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0012 - val_loss: 0.0010
Epoch 4/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0010 - val_loss: 8.7323e-04
Epoch 5/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 9.7495e-04 - val_loss: 8.2023e-04
Epoch 6/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0010 - val_loss: 0.0010
Epoch 7/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0010 - val_loss: 7.2306e-04
Epoch 8/50
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0010 - val_loss: 6.8491e-04
Epoch 9/50
[1m48/48[0m [

<keras.src.callbacks.history.History at 0x7da76e623190>

In [11]:
# Fazer previsões para treino e teste
train_predict = scaler.inverse_transform(model_lstm.predict(X_train))
test_predict = scaler.inverse_transform(model_lstm.predict(X_test))

# Reverter escalonamento dos valores reais
y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Exibir previsões
test_predict[:5]


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


array([[19.143976],
       [19.143976],
       [19.143976],
       [19.143976],
       [19.143976]], dtype=float32)

In [12]:
# Previsões Prophet
prophet_predictions = prophet_pred['yhat'].values[:len(y_test_inv)]

# Previsões LSTM
lstm_predictions = test_predict[:len(y_test_inv)].flatten()

# Cálculo das métricas de avaliação
prophet_mae = mean_absolute_error(y_test_inv, prophet_predictions)
prophet_rmse = math.sqrt(mean_squared_error(y_test_inv, prophet_predictions))
prophet_mape = mean_absolute_percentage_error(y_test_inv, prophet_predictions) * 100
prophet_r2 = r2_score(y_test_inv, prophet_predictions)

lstm_mae = mean_absolute_error(y_test_inv, lstm_predictions)
lstm_rmse = math.sqrt(mean_squared_error(y_test_inv, lstm_predictions))
lstm_mape = mean_absolute_percentage_error(y_test_inv, lstm_predictions) * 100
lstm_r2 = r2_score(y_test_inv, lstm_predictions)

# Exibir métricas
print("\nMétricas de Avaliação - Prophet:")
print(f"MAE: {prophet_mae:.4f}")
print(f"RMSE: {prophet_rmse:.4f}")
print(f"MAPE: {prophet_mape:.4f}%")
print(f"R²: {prophet_r2:.4f}")

print("\nMétricas de Avaliação - LSTM:")
print(f"MAE: {lstm_mae:.4f}")
print(f"RMSE: {lstm_rmse:.4f}")
print(f"MAPE: {lstm_mape:.4f}%")
print(f"R²: {lstm_r2:.4f}")



Métricas de Avaliação - Prophet:
MAE: 11.4068
RMSE: 15.7252
MAPE: 34.8617%
R²: -0.1793

Métricas de Avaliação - LSTM:
MAE: 1.6306
RMSE: 3.4558
MAPE: 4.3020%
R²: 0.9430
