In [9]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_squared_error

# Load the data
df = pd.read_csv('/content/Patna.csv', parse_dates=['Date'], index_col='Date')

# Split the data into train and test sets
train_data = df[:-12]
test_data = df[-12:]

# Define the forecasting functions
def arima_forecast(train, test):
    model = ARIMA(train, order=(1,1,1))
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=12)
    return predictions

def sarima_forecast(train, test):
    model = SARIMAX(train, order=(1,1,1), seasonal_order=(0,1,1,12))
    model_fit = model.fit(disp=False)
    predictions = model_fit.forecast(steps=12)
    return predictions

def prophet_forecast(train, test):
    train = train.reset_index()
    train.columns = ['ds', 'y']
    model = Prophet()
    model.fit(train)
    future = model.make_future_dataframe(periods=12, freq='MS')
    forecast = model.predict(future)
    return forecast['yhat'][-12:]

# Apply each forecasting function
arima_predictions = arima_forecast(train_data, test_data)
sarima_predictions = sarima_forecast(train_data, test_data)
prophet_predictions = prophet_forecast(train_data, test_data)

# Create separate DataFrames for each model's predictions
arima_pred_df = pd.DataFrame({'Date': pd.date_range(start='2023-06-24', periods=len(arima_predictions), freq='D'), 'ARIMA': arima_predictions})
sarima_pred_df = pd.DataFrame({'Date': pd.date_range(start='2023-06-24', periods=len(sarima_predictions), freq='D'), 'SARIMA': sarima_predictions})
prophet_pred_df = pd.DataFrame({'Date': pd.date_range(start='2023-08-01', periods=len(prophet_predictions), freq='MS'), 'Prophet': prophet_predictions})

# Resample ARIMA and SARIMA predictions to monthly frequency
arima_pred_monthly = arima_pred_df.resample('MS', on='Date').mean()
sarima_pred_monthly = sarima_pred_df.resample('MS', on='Date').mean()

# Concatenate the DataFrames together
comparison_df = pd.concat([arima_pred_monthly, sarima_pred_monthly, prophet_pred_df], axis=1)
comparison_df = comparison_df.loc[:, ['ARIMA', 'SARIMA', 'Prophet']]
print("Comparison of Model Predictions:")
print(comparison_df)

# Calculate MSE for each model
arima_mse = mean_squared_error(test_data, arima_predictions)
sarima_mse = mean_squared_error(test_data, sarima_predictions)
prophet_mse = mean_squared_error(test_data, prophet_predictions)

print("\nMean Squared Error (MSE):")
print(f"ARIMA: {arima_mse:.10f}")
print(f"SARIMA: {sarima_mse:.10f}")
print(f"Prophet: {prophet_mse:.10f}")

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/pet9ifcx.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/kt8x0uae.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=52906', 'data', 'file=/tmp/tmpi3gqurbx/pet9ifcx.json', 'init=/tmp/tmpi3gqurbx/kt8x0uae.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_modeleb54mgrv/prophet_model-20240508173529.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:35:29 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanp

Comparison of Model Predictions:
                        ARIMA    SARIMA   Prophet
2023-06-01 00:00:00  0.000074 -0.000010       NaN
2023-07-01 00:00:00  0.000017 -0.000156       NaN
1675                      NaN       NaN  0.000158
1676                      NaN       NaN  0.000119
1677                      NaN       NaN  0.000144
1678                      NaN       NaN  0.000124
1679                      NaN       NaN  0.000177
1680                      NaN       NaN  0.000172
1681                      NaN       NaN  0.000135
1682                      NaN       NaN  0.000159
1683                      NaN       NaN  0.000140
1684                      NaN       NaN  0.000169
1685                      NaN       NaN  0.000159
1686                      NaN       NaN  0.000153

Mean Squared Error (MSE):
ARIMA: 0.0000000105
SARIMA: 0.0000000542
Prophet: 0.0000000009


In [10]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_squared_error

# Load the data
df = pd.read_csv('/content/Patna.csv', parse_dates=['Date'], index_col='Date')

# Split the data into train and test sets
train_data = df[:-12]
test_data = df[-12:]

# Define the forecasting functions
def arima_forecast(train, test, steps):
    model = ARIMA(train, order=(1,1,1))
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=steps)
    return predictions

def sarima_forecast(train, test, steps):
    model = SARIMAX(train, order=(1,1,1), seasonal_order=(0,1,1,12))
    model_fit = model.fit(disp=False)
    predictions = model_fit.forecast(steps=steps)
    return predictions

def prophet_forecast(train, test, steps):
    train = train.reset_index()
    train.columns = ['ds', 'y']
    model = Prophet()
    model.fit(train)
    future = model.make_future_dataframe(periods=steps, freq='MS')
    forecast = model.predict(future)
    return forecast['yhat'][-steps:]

# Apply each forecasting function
arima_predictions = arima_forecast(train_data, test_data, steps=12)
sarima_predictions = sarima_forecast(train_data, test_data, steps=12)
prophet_predictions = prophet_forecast(train_data, test_data, steps=12)

# Calculate MSE for each model
arima_mse = mean_squared_error(test_data, arima_predictions)
sarima_mse = mean_squared_error(test_data, sarima_predictions)
prophet_mse = mean_squared_error(test_data, prophet_predictions)

# Select the best model based on MSE
best_model = min(arima_mse, sarima_mse, prophet_mse)

if best_model == arima_mse:
    print("ARIMA is the best model.")
    best_predictions = arima_forecast(df, None, steps=12)
elif best_model == sarima_mse:
    print("SARIMA is the best model.")
    best_predictions = sarima_forecast(df, None, steps=12)
else:
    print("Prophet is the best model.")
    best_predictions = prophet_forecast(df, None, steps=12)

# Create a DataFrame with the best model's predictions for the next 1 year
next_year_predictions = pd.DataFrame(
    {'Date': pd.date_range(start=df.index[-1], periods=13, freq='MS')[1:],
     'Predicted_Value': best_predictions})

print("\nNext 1 Year Predictions:")
print(next_year_predictions)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/27bxhx0k.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/6wuvucvs.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=99805', 'data', 'file=/tmp/tmpi3gqurbx/27bxhx0k.json', 'init=/tmp/tmpi3gqurbx/6wuvucvs.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_model3k3t1yli/prophet_model-20240508173804.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:38:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanp

Prophet is the best model.


DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/zd7djzze.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=64450', 'data', 'file=/tmp/tmpi3gqurbx/12a3tjw2.json', 'init=/tmp/tmpi3gqurbx/zd7djzze.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_modelatqfnp01/prophet_model-20240508173806.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:38:06 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:38:06 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



Next 1 Year Predictions:
           Date  Predicted_Value
1687 2023-09-01         0.000118
1688 2023-10-01         0.000143
1689 2023-11-01         0.000123
1690 2023-12-01         0.000176
1691 2024-01-01         0.000171
1692 2024-02-01         0.000134
1693 2024-03-01         0.000158
1694 2024-04-01         0.000139
1695 2024-05-01         0.000168
1696 2024-06-01         0.000158
1697 2024-07-01         0.000152
1698 2024-08-01         0.000163


In [11]:
#Daly data
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.metrics import mean_squared_error

# Load the data
df = pd.read_csv('/content/Patna.csv', parse_dates=['Date'], index_col='Date')

# Resample the data to daily frequency
df_daily = df.resample('D').asfreq()

# Fill missing values with the previous day's value
df_daily.fillna(method='ffill', inplace=True)

# Split the data into train and test sets
train_data = df_daily[:-365]
test_data = df_daily[-365:]

# Define the forecasting functions
def arima_forecast(train, test, steps):
    model = ARIMA(train, order=(1,1,1))
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=steps)
    return predictions

def sarima_forecast(train, test, steps):
    model = SARIMAX(train, order=(1,1,1), seasonal_order=(0,1,1,7))
    model_fit = model.fit(disp=False)
    predictions = model_fit.forecast(steps=steps)
    return predictions

def prophet_forecast(train, test, steps):
    train = train.reset_index()
    train.columns = ['ds', 'y']
    model = Prophet(daily_seasonality=True)
    model.fit(train)
    future = model.make_future_dataframe(periods=steps, freq='D')
    forecast = model.predict(future)
    return forecast['yhat'][-steps:]

# Apply each forecasting function
arima_predictions = arima_forecast(train_data, test_data, steps=365)
sarima_predictions = sarima_forecast(train_data, test_data, steps=365)
prophet_predictions = prophet_forecast(train_data, test_data, steps=365)

# Calculate MSE for each model
arima_mse = mean_squared_error(test_data, arima_predictions)
sarima_mse = mean_squared_error(test_data, sarima_predictions)
prophet_mse = mean_squared_error(test_data, prophet_predictions)

# Select the best model based on MSE
best_model = min(arima_mse, sarima_mse, prophet_mse)

if best_model == arima_mse:
    print("ARIMA is the best model.")
    best_predictions = arima_forecast(df_daily, None, steps=365)
elif best_model == sarima_mse:
    print("SARIMA is the best model.")
    best_predictions = sarima_forecast(df_daily, None, steps=365)
else:
    print("Prophet is the best model.")
    best_predictions = prophet_forecast(df_daily, None, steps=365)

# Create a DataFrame with the best model's predictions for the next 1 year
next_year_predictions = pd.DataFrame(
    {'Date': pd.date_range(start=df_daily.index[-1], periods=366, freq='D')[1:],
     'Predicted_Value': best_predictions})

print("\nNext 1 Year Predictions:")
print(next_year_predictions)

  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting MA parameters found.'
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/61mwxcua.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/0e257j1g.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=99466', 'data', 'file=/tmp/tmpi3gqurbx/61mwxcua.json', 'init=/tmp/tmpi3gqurbx/0e257j1g.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_modellzq_nymm/prophet_model-20240508174005.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:40:05 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:40:05 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/2ik_gwcm.json


Prophet is the best model.


DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/7lsypsr6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6675', 'data', 'file=/tmp/tmpi3gqurbx/2ik_gwcm.json', 'init=/tmp/tmpi3gqurbx/7lsypsr6.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_model0fmbkbh0/prophet_model-20240508174006.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:40:06 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:40:06 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



Next 1 Year Predictions:
           Date  Predicted_Value
1687 2023-07-06         0.000154
1688 2023-07-07         0.000154
1689 2023-07-08         0.000153
1690 2023-07-09         0.000150
1691 2023-07-10         0.000150
...         ...              ...
2047 2024-06-30         0.000162
2048 2024-07-01         0.000162
2049 2024-07-02         0.000163
2050 2024-07-03         0.000163
2051 2024-07-04         0.000162

[365 rows x 2 columns]


In [12]:
#whole data using prophet without using train and test

In [13]:
import pandas as pd
from prophet import Prophet

# Load the data
df = pd.read_csv('/content/Patna.csv', parse_dates=['Date'], index_col='Date')

# Resample the data to daily frequency
df_daily = df.resample('D').asfreq()

# Fill missing values with the previous day's value
df_daily.fillna(method='ffill', inplace=True)

# Prepare the data for Prophet
df_prophet = df_daily.reset_index()
df_prophet.columns = ['ds', 'y']

# Create and train the Prophet model
model = Prophet(daily_seasonality=True)
model.fit(df_prophet)

# Generate future dates for the next year
future_dates = model.make_future_dataframe(periods=365, freq='D')

# Make predictions for the next year
forecast = model.predict(future_dates)

# Extract the forecasted values for the next year
next_year_predictions = forecast['yhat'][-365:]

# Create a DataFrame with the next year's predictions
next_year_df = pd.DataFrame({
    'Date': pd.date_range(start=df_daily.index[-1], periods=366, freq='D')[1:],
    'Predicted_Value': next_year_predictions
})

print("Next Year Predictions:")
print(next_year_df)

DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/r2cc6kic.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpi3gqurbx/c3mo2trb.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=75569', 'data', 'file=/tmp/tmpi3gqurbx/r2cc6kic.json', 'init=/tmp/tmpi3gqurbx/c3mo2trb.json', 'output', 'file=/tmp/tmpi3gqurbx/prophet_modelga0e8aag/prophet_model-20240508174407.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:44:07 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:44:07 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


Next Year Predictions:
           Date  Predicted_Value
1687 2023-07-06         0.000154
1688 2023-07-07         0.000154
1689 2023-07-08         0.000153
1690 2023-07-09         0.000150
1691 2023-07-10         0.000150
...         ...              ...
2047 2024-06-30         0.000162
2048 2024-07-01         0.000162
2049 2024-07-02         0.000163
2050 2024-07-03         0.000163
2051 2024-07-04         0.000162

[365 rows x 2 columns]
