In [None]:
"""Untitled26.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/10PRv9P3TfvCD09dL-LFKaPCRn19mmj7O
"""

In [None]:
!pip install pytrends
!pip install prophet
!pip install statsmodels
!pip install joblib
!pip install pmdarima
!pip install streamlit

Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pytrends.request import TrendReq
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.stats.diagnostic import acorr_ljungbox
import itertools
import joblib

Fetch Google Trends Data

In [None]:
# Set up the connection to Google Trends
pytrends = TrendReq(hl='en-US', tz=360)

In [None]:
# Define the search terms
keywords = ["fever", "cough", "shortness of breath", "loss of taste"]

In [None]:
# Pull interest over time for the last 12 months
pytrends.build_payload(keywords, cat=0, timeframe='today 12-m', geo='', gprop='')
trend_data = pytrends.interest_over_time()

In [None]:
# Preview the data
print(trend_data.head())

In [None]:
# Save data to CSV
trend_data.to_csv("google_trends_health_data.csv")

In [None]:
# Check for missing values and data types
print(trend_data.info())
print(trend_data.isnull().sum())

Clean and Process Data

In [None]:
# Fill any missing values
trend_data.fillna(method='ffill', inplace=True)

In [None]:
# Ensure no NaNs in fever_data for Prophet
fever_data = trend_data[['fever']].reset_index().rename(columns={'date': 'ds', 'fever': 'y'})
fever_data.dropna(inplace=True)  # Drop NaNs if necessary

Fit Prophet Model

In [None]:
# Initialize and fit Prophet model
prophet_model = Prophet()
prophet_model.fit(fever_data)

In [None]:
# Create future dataframe and make forecast
future = prophet_model.make_future_dataframe(periods=60)
forecast = prophet_model.predict(future)

In [None]:
# Plot forecast
prophet_model.plot(forecast)
plt.title("Fever Search Trend Forecast with Prophet")
plt.show()

In [None]:
# Plot forecast components
prophet_model.plot_components(forecast)
plt.show()

Fit ARIMA Model

In [None]:
# Check and handle NaNs in ARIMA data
arima_data = trend_data['fever'].dropna()

In [None]:
# Fit ARIMA model
arima_model = ARIMA(arima_data, order=(5, 1, 0))
arima_result = arima_model.fit()

In [None]:
# Forecast for the next 60 periods (days)
arima_forecast = arima_result.forecast(steps=60)

In [None]:
# Plot the forecast
plt.figure(figsize=(10, 6))
plt.plot(trend_data.index, trend_data['fever'], label='Historical Fever Data')
plt.plot(pd.date_range(trend_data.index[-1], periods=60, freq='D'), arima_forecast, label='ARIMA Forecasted Data', color='red')
plt.legend()
plt.title("Fever Search Forecast with ARIMA")
plt.grid(True)
plt.show()

Model Evaluation

In [None]:
# Split data into training and test sets
train_data = fever_data[:-20]
test_data = fever_data[-20:]

In [None]:
# Train Prophet model on training data
prophet_model = Prophet()
prophet_model.fit(train_data)

In [None]:
# Forecast on test set
future = prophet_model.make_future_dataframe(periods=20)
forecast = prophet_model.predict(future)
predicted = forecast[-20:]['yhat']
actual = test_data['y']

In [None]:
# Calculate MAE, MSE, and RMSE for Prophet
prophet_mae = mean_absolute_error(actual, predicted)
prophet_mse = mean_squared_error(actual, predicted)
prophet_rmse = np.sqrt(prophet_mse)
print(f"Prophet MAE: {prophet_mae}, Prophet RMSE: {prophet_rmse}")

In [None]:
# Forecast using ARIMA
arima_forecast = arima_result.forecast(steps=20)

In [None]:
# Calculate MAE, MSE, and RMSE for ARIMA
arima_mae = mean_absolute_error(test_data['y'], arima_forecast)
arima_mse = mean_squared_error(test_data['y'], arima_forecast)
arima_rmse = np.sqrt(arima_mse)
print(f"ARIMA MAE: {arima_mae}, ARIMA RMSE: {arima_rmse}")

Optimize ARIMA Parameters

In [None]:
# Define the range of p, d, and q values
p = d = q = range(0, 3)
pdq_combinations = list(itertools.product(p, d, q))

In [None]:
# Try all combinations of p, d, q
best_aic = float("inf")
best_pdq = None

In [None]:
for param in pdq_combinations:
    try:
        model = ARIMA(trend_data['fever'], order=param)
        result = model.fit()
        if result.aic < best_aic:
            best_aic = result.aic
            best_pdq = param
    except:
        continue

In [None]:
print(f"Best ARIMA parameters: {best_pdq}, with AIC: {best_aic}")

In [None]:
# Check Residuals
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox  # Correct import for older versions

In [None]:
# Extract residuals
residuals = arima_result.resid

In [None]:
# Plot residuals
plt.figure(figsize=(10, 6))
plt.plot(residuals)
plt.title("Residuals of ARIMA Model")
plt.xlabel("Date")
plt.ylabel("Residuals")
plt.grid(True)
plt.show()

In [None]:
# Determine the sample size
sample_size = len(residuals)

In [None]:
# Calculate max_lags, ensure it's strictly less than half the sample size
max_lags = min((sample_size // 2) - 1, 25)  # Ensure max_lags is less than 26

In [None]:
# Plot ACF and PACF of residuals
fig, ax = plt.subplots(1, 2, figsize=(15, 6))

In [None]:
# Plot ACF and PACF using the adjusted max_lags
plot_acf(residuals, lags=max_lags, ax=ax[0])
plot_pacf(residuals, lags=max_lags, ax=ax[1])
plt.show()

In [None]:
# Perform the Ljung-Box test
lb_test = acorr_ljungbox(residuals, lags=[10], return_df=True)  # Updated function call
print(lb_test)

Compare with Other Models

In [None]:
# Define parameter ranges
p = d = q = range(0, 3)
pdq_combinations = list(itertools.product(p, d, q))

In [None]:
# Try different ARIMA models
best_aic = float("inf")
best_model = None
best_pdq = None

In [None]:
for param in pdq_combinations:
    try:
        model = ARIMA(trend_data['fever'], order=param)
        result = model.fit()
        if result.aic < best_aic:
            best_aic = result.aic
            best_model = result
            best_pdq = param
    except:
        continue

In [None]:
print(f"Best ARIMA parameters: {best_pdq}, with AIC: {best_aic}")

Cross-Validation

In [None]:
from sklearn.model_selection import TimeSeriesSplit

In [None]:
tscv = TimeSeriesSplit(n_splits=5)

In [None]:
for train_index, test_index in tscv.split(trend_data):
    train, test = trend_data.iloc[train_index], trend_data.iloc[test_index]
    model = ARIMA(train['fever'], order=(0, 2, 1))
    result = model.fit()
    forecast = result.forecast(steps=len(test))
    # Evaluate forecast accuracy
    print(f"Train period: {train.index[-1]} to {test.index[0]}")
    print(f"Test period: {test.index[0]} to {test.index[-1]}")

Prophet Model Adjustments

In [None]:
# Initialize and fit Prophet model with custom seasonality
prophet_model = Prophet(changepoint_prior_scale=0.1)  # Lower value for less sensitivity to changepoints
prophet_model.add_seasonality(name='monthly', period=30.5, fourier_order=8)
prophet_model.fit(fever_data)

In [None]:
# Create future dataframe and make forecast
future = prophet_model.make_future_dataframe(periods=60)
forecast = prophet_model.predict(future)

In [None]:
# Plot forecast
prophet_model.plot(forecast)
plt.title("Fever Search Trend Forecast with Prophet")
plt.show()

In [None]:
# Plot forecast components
prophet_model.plot_components(forecast)
plt.show()

In [None]:
# Extract predictions from Prophet forecast
prophet_predictions = forecast[['ds', 'yhat']].set_index('ds')
prophet_predictions = prophet_predictions.loc[trend_data.index[-20:]]
actual_values = fever_data.set_index('ds').loc[trend_data.index[-20:]]

Save and Load Models

In [None]:
# Save Prophet model
joblib.dump(prophet_model, 'prophet_model.pkl')

In [None]:
# Save ARIMA model
joblib.dump(arima_result, 'arima_model.pkl')

In [None]:
# Load Prophet model
loaded_prophet_model = joblib.load('prophet_model.pkl')

In [None]:
# Load ARIMA model
loaded_arima_model = joblib.load('arima_model.pkl')

In [None]:
# Predict with the loaded model
future = loaded_prophet_model.make_future_dataframe(periods=60)
forecast = loaded_prophet_model.predict(future)

In [None]:
# 1. Incorporate External Data (Example: Weather Data)
import requests
def fetch_weather_data(start_date, end_date):
    api_key = 'YOUR_WEATHER_API_KEY'
    url = f'https://api.weatherapi.com/v1/history.json?key={api_key}&q=YOUR_LOCATION&dt={start_date}&end_dt={end_date}'
    response = requests.get(url)
    data = response.json()
    return data

In [None]:
weather_data = fetch_weather_data('2023-01-01', '2023-09-01')

In [None]:
# 2. ARIMA Model Selection using `pmdarima`
from pmdarima import auto_arima

In [None]:
auto_model = auto_arima(trend_data['fever'], seasonal=True, m=12)
print(auto_model.summary())

In [None]:
# 3. Hybrid Model Example
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA

In [None]:
# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(fever_data)
future = prophet_model.make_future_dataframe(periods=60)
prophet_forecast = prophet_model.predict(future)

In [None]:
# Fit ARIMA model
arima_model = auto_arima(trend_data['fever'], seasonal=True, m=12)
arima_forecast = arima_model.predict(n_periods=60)

In [None]:
# 4. Interactive Dashboard (Streamlit example)
import streamlit as st
st.title('Google Trends Forecast Dashboard')
st.line_chart(trend_data['fever'])
st.line_chart(prophet_forecast[['ds', 'yhat']].set_index('ds'))
st.line_chart(pd.Series(arima_forecast, index=pd.date_range(trend_data.index[-1], periods=60, freq='D')))