In [2]:
import yfinance as yf
from datetime import datetime, timedelta

def download_stock_data(ticker, years_back):
    end_date = datetime(2023, 12, 31)
    start_date = end_date - timedelta(days=years_back * 365)

    data = yf.download(ticker, start=start_date, end=end_date)
    return data

# Example Usage
ticker = 'AMZN'  # Replace with the desired ticker symbol
years_back = 3
historical_data = download_stock_data(ticker, years_back)
print(historical_data)

[*********************100%%**********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2020-12-31  163.750000  164.145996  162.059998  162.846497  162.846497   
2021-01-04  163.500000  163.600006  157.201004  159.331497  159.331497   
2021-01-05  158.300507  161.169006  158.253006  160.925507  160.925507   
2021-01-06  157.324005  159.875504  156.557999  156.919006  156.919006   
2021-01-07  157.850006  160.427002  157.750000  158.108002  158.108002   
...                ...         ...         ...         ...         ...   
2023-12-22  153.770004  154.350006  152.710007  153.419998  153.419998   
2023-12-26  153.559998  153.979996  153.029999  153.410004  153.410004   
2023-12-27  153.559998  154.779999  153.119995  153.339996  153.339996   
2023-12-28  153.720001  154.080002  152.949997  153.380005  153.380005   
2023-12-29  153.100006  153.889999  151.029

In [3]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller


close_series = historical_data['Close']

# Realiza la prueba de Dickey-Fuller
result = adfuller(close_series)

# Imprime el resultado
print("Estadística de prueba de Dickey-Fuller:", result[0])
print("Valor crítico al 1%:", result[4]['1%'])
print("Valor crítico al 5%:", result[4]['5%'])
print("Valor crítico al 10%:", result[4]['10%'])
print("P-valor:", result[1])

# Comprueba si la serie es estacionaria
if result[1] <= 0.05:
    print("La serie es estacionaria.")
else:
    print("La serie no es estacionaria.")


Estadística de prueba de Dickey-Fuller: -1.6268211732094386
Valor crítico al 1%: -3.4390641198617864
Valor crítico al 5%: -2.8653859408474482
Valor crítico al 10%: -2.5688179819544312
P-valor: 0.4691550717125605
La serie no es estacionaria.


In [4]:
# Diferenciación de primer orden
close_series_diff = close_series.diff().dropna()

# Realiza la prueba de Dickey-Fuller en la serie diferenciada
result_diff = adfuller(close_series_diff)
# Imprime el resultado
print("Estadística de prueba de Dickey-Fuller:", result_diff[0])
print("Valor crítico al 1%:", result_diff[4]['1%'])
print("Valor crítico al 5%:", result_diff[4]['5%'])
print("Valor crítico al 10%:", result_diff[4]['10%'])
print("P-valor:", result_diff[1])

Estadística de prueba de Dickey-Fuller: -27.606479555080753
Valor crítico al 1%: -3.439075747702915
Valor crítico al 5%: -2.8653910653234655
Valor crítico al 10%: -2.568820711931304
P-valor: 0.0


In [5]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

# Asegúrate de que la fecha sea el índice
historical_data.index = pd.to_datetime(historical_data.index)

# Dividir los datos en entrenamiento y prueba
train_data = historical_data['Close'][:-60]
test_data = historical_data['Close'][-60:]






In [6]:
from pmdarima import auto_arima

model_autoARIMA = auto_arima(train_data, star_p=0, start_q=0,
                             test='adf',
                             max_p=7, max_q=7,
                             m=1,
                             d=None,
                             seasonal=False,
                             start_P=0,
                             D=0,
                             trace=True,
                             error_action='ignore',
                             suppress_warnings=True,
                             stepwise=True                             )

best_p = model_autoARIMA.order[0]
best_d = model_autoARIMA.order[1]
best_q = model_autoARIMA.order[2]

Performing stepwise search to minimize aic
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=3554.449, Time=0.07 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=3550.535, Time=0.01 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=3552.514, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=3552.514, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=3548.725, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=3553.797, Time=0.25 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.451 seconds


In [7]:
import pandas as pd
import numpy as np
import statsmodels.api as sm


historical_data.index = pd.to_datetime(historical_data.index)


historical_data['Close'] = pd.to_numeric(historical_data['Close'], errors='coerce').fillna(0)

# División Train/Test
train_data = historical_data['Close'].iloc[:-30]  # Por ejemplo, deja los últimos 30 días para prueba
test_data = historical_data['Close'].iloc[-30:]

# Crea y entrena el modelo ARIMA con los parámetros mencionados
model = sm.tsa.ARIMA(train_data, order=(best_p, best_d, best_q))
results = model.fit()

# Realiza predicciones en el conjunto de prueba
predictions = results.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1, typ='levels')


print(predictions)



724    143.199997
725    143.199997
726    143.199997
727    143.199997
728    143.199997
729    143.199997
730    143.199997
731    143.199997
732    143.199997
733    143.199997
734    143.199997
735    143.199997
736    143.199997
737    143.199997
738    143.199997
739    143.199997
740    143.199997
741    143.199997
742    143.199997
743    143.199997
744    143.199997
745    143.199997
746    143.199997
747    143.199997
748    143.199997
749    143.199997
750    143.199997
751    143.199997
752    143.199997
753    143.199997
Name: predicted_mean, dtype: float64


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [8]:
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_absolute_percentage_error

# Calcula el error cuadrático medio (MSE)
mse = mean_squared_error(test_data, predictions)
print(f'Mean Squared Error (MSE): {mse}')

# Calcula el error absoluto medio (MAE)
mae = mean_absolute_error(test_data, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

# Calcula el MAPE
mape = mean_absolute_percentage_error(test_data, predictions)
# Imprime el resultado
print(f'Mean Absolute Percentage Error (MAPE): {mape*100:.2f}%')

Mean Squared Error (MSE): 39.6957915645636
Mean Absolute Error (MAE): 5.32933603922526
Mean Absolute Percentage Error (MAPE): 3.54%
