In [1]:
import sys
import os

# Add the project directory to the Python path
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_dir not in sys.path:
    sys.path.append(project_dir)

In [None]:
!pip install pandas openpyxl
!pip install statsmodels
!pip install arch
!pip install tensorflow
!pip install boto3
!pip install requests
!pip install numpy
!pip install joblib matplotlib
!pip install prophet
!pip install tabulate



In [None]:
import pandas as pd
import numpy as np
import boto3
import joblib
import tarfile
from prophet import Prophet
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from scripts.preprocessing import load_data_csv, make_stationary, split_data, save_model_local, upload_model_s3, calculate_metrics
from prophet import Prophet
from scipy import stats
import seaborn as sns
from tabulate import tabulate

In [None]:
bucket = 'bk-price-prediction-data'
file_key = 'data/PIMIENTO/PIMIENTO_DATASET_1.csv'
separator = ','
df = load_data_csv(bucket, file_key, separator)
df.info()

In [None]:
df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index('DATE', inplace=True)

In [None]:
# Remove outliers from the DataFrame
df = df[(np.abs(stats.zscore(df['PRICE'])) < 3)]
df.info()

In [None]:
# Normalize data
scaler = MinMaxScaler()
df['PRICE'] = scaler.fit_transform(df[['PRICE']])

In [None]:
df_merge = df.asfreq('W')
df_merge = df.dropna(subset=['PRICE'])
print(df.index.inferred_freq)
decomposition = seasonal_decompose(df['PRICE'], model='additive', period=52)

# Plot the decomposition
decomposition.plot()
plt.show()

In [None]:
# Prepare data for Prophet
df_prophet = df_merge.reset_index().rename(columns={'DATE': 'ds', 'PRICE': 'y'})

In [None]:
train, test = split_data(df_prophet)

In [None]:
# Initialize and fit Prophet model
model = Prophet(
    weekly_seasonality=True,
    yearly_seasonality=False,
    changepoint_prior_scale=0.1,  # Adjust this parameter
    seasonality_prior_scale=10.0  # Adjust this parameter
)
model.fit(train)

In [None]:
# Make predictions
forecast = model.predict(test)

In [None]:
# Evaluate the model
predictions = forecast['yhat'][-len(test):].values
true_values = test['y'].values

In [None]:
results=calculate_metrics(true_values,predictions)

In [None]:
print(tabulate(results, headers=["Metric", "Value"], tablefmt="grid"))

In [None]:
name_model = 'model_pimiento'
save_model_local(model,name_model)

In [None]:
# Upload the model to S3
upload_model_s3(name_model, bucket)