1st

In [1]:
# Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall
weekly_footfall = df.resample('W').count()  # Weekly footfall
monthly_footfall = df.resample('M').count()  # Monthly footfall

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall, label='Daily Footfall', color='blue')
plt.title('Daily Metro Footfall Over Time')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()  # Adding legend
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(daily_footfall['transaction_id'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# ARIMA Model - Time-Series Forecasting
model = ARIMA(daily_footfall['transaction_id'], order=(5,1,0))  # Adjust order based on ACF/PACF analysis
model_fit = model.fit()
forecast = model_fit.forecast(steps=30)  # Forecast the next 30 days

# Plotting ARIMA Forecast
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
plt.plot(pd.date_range(daily_footfall.index[-1], periods=30, freq='D'), forecast, label='Forecast', color='orange')
plt.title('ARIMA Model: Observed vs Forecasted Footfall')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()  # Adding legend
plt.show()

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()
daily_footfall_prophet.columns = ['ds', 'y']

prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 1 year into the future
forecast_prophet = prophet_model.predict(future)

# Plotting Prophet Forecast
fig = prophet_model.plot(forecast_prophet)
plt.title('Prophet Model Forecast')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend(['Observed', 'Forecast'])  # Adding legend for Prophet model
plt.show()

# Evaluating the ARIMA Model
# Assuming you have true values for evaluation
# true_values = [place the actual values if available]
# rmse = np.sqrt(mean_squared_error(true_values, forecast))
# print(f'RMSE: {rmse}')


KeyError: 'timestamp'

2nd

In [None]:
# Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima  # Use auto_arima for automatic ARIMA order selection
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall, label='Daily Footfall', color='blue')
plt.title('Daily Metro Footfall Over Time')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()  # Adding legend
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(daily_footfall['transaction_id'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Check the size of the dataset
print(f"Total dataset length: {len(daily_footfall)}")

# Splitting data into training and testing sets
test_size = 30
if len(daily_footfall) > test_size:
    train_size = len(daily_footfall) - test_size
    train, test = daily_footfall.iloc[:train_size], daily_footfall.iloc[train_size:]
else:
    train = daily_footfall
    test = pd.DataFrame()  # Empty test set if not enough data

# Check if train and test datasets are not empty
print(f"Train dataset length: {len(train)}")
print(f"Test dataset length: {len(test)}")

if len(train) > 0 and len(test) > 0:
    try:
        # Automatically determine ARIMA order
        auto_model = auto_arima(train['transaction_id'].dropna(), seasonal=False, stepwise=True)
        print("Auto ARIMA Model Summary:")
        print(auto_model.summary())
        
        # Fit ARIMA model with the best parameters
        arima_model = ARIMA(train['transaction_id'], order=auto_model.order)
        arima_model_fit = arima_model.fit()
        arima_forecast = arima_model_fit.forecast(steps=30)

        # Evaluate ARIMA model
        arima_rmse = np.sqrt(mean_squared_error(test['transaction_id'], arima_forecast))
        arima_mae = mean_absolute_error(test['transaction_id'], arima_forecast)

        # Display ARIMA Forecasted Data and Evaluation Metrics
        print("ARIMA Model - 30 Day Forecasted Footfall:")
        print(arima_forecast)
        print(f'ARIMA Model - RMSE: {arima_rmse}')
        print(f'ARIMA Model - MAE: {arima_mae}')

        # Plotting ARIMA Forecast
        plt.figure(figsize=(10, 6))
        plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
        plt.plot(pd.date_range(daily_footfall.index[-30], periods=30, freq='D'), arima_forecast, label='Forecast', color='orange')
        plt.title('ARIMA Model: Observed vs Forecasted Footfall')
        plt.xlabel('Date')
        plt.ylabel('Footfall')
        plt.legend()  # Adding legend
        plt.show()
    except Exception as e:
        print(f"An error occurred with ARIMA model: {e}")
else:
    print("Not enough data for ARIMA model. Please check the data.")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()

# Print columns to debug
print("Columns before renaming:", daily_footfall_prophet.columns)

# Drop any additional columns and rename correctly
if len(daily_footfall_prophet.columns) == 2:
    daily_footfall_prophet.columns = ['ds', 'y']  # Rename for Prophet
else:
    raise ValueError(f"Unexpected number of columns: {len(daily_footfall_prophet.columns)}")

# Ensure 'ds' is datetime and 'y' is numeric
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 1 year into the future
forecast_prophet = prophet_model.predict(future)

# Evaluate Prophet model
try:
    # Extracting the last 30 days of forecast data for comparison
    prophet_forecast_last_30 = forecast_prophet[['ds', 'yhat']].tail(30)
    prophet_test = daily_footfall_prophet.tail(30).set_index('ds')

    prophet_rmse = np.sqrt(mean_squared_error(prophet_test['y'], prophet_forecast_last_30['yhat']))
    prophet_mae = mean_absolute_error(prophet_test['y'], prophet_forecast_last_30['yhat'])

    # Display Prophet Forecasted Data and Evaluation Metrics
    print("Prophet Model - 365 Day Forecasted Footfall:")
    print(forecast_prophet[['ds', 'yhat']].tail(30))  # Display last 30 days forecast
    print(f'Prophet Model - RMSE: {prophet_rmse}')
    print(f'Prophet Model - MAE: {prophet_mae}')

    # Plotting Prophet Forecast
    fig = prophet_model.plot(forecast_prophet)
    plt.title('Prophet Model Forecast')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend(['Observed', 'Forecast'])  # Adding legend for Prophet model
    plt.show()
except Exception as e:
    print(f"An error occurred with Prophet model: {e}")


3rd 

In [None]:
# Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall, label='Daily Footfall', color='blue')
plt.title('Daily Metro Footfall Over Time')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(daily_footfall['transaction_id'].dropna())
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
if result[1] > 0.05:
    print("The series is not stationary. Differencing may be required.")

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'].dropna(), model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Splitting data into training and testing sets
train_size = len(daily_footfall) - 30
train, test = daily_footfall.iloc[:train_size], daily_footfall.iloc[train_size:]

# ARIMA Model - Time-Series Forecasting
try:
    arima_model = ARIMA(train['transaction_id'].dropna(), order=(5,1,0))  # Adjust order based on ACF/PACF analysis
    arima_model_fit = arima_model.fit()
    arima_forecast = arima_model_fit.forecast(steps=30)

    # Evaluate ARIMA model
    arima_rmse = np.sqrt(mean_squared_error(test['transaction_id'], arima_forecast))
    arima_mae = mean_absolute_error(test['transaction_id'], arima_forecast)

    # Display ARIMA Forecasted Data and Evaluation Metrics
    print("ARIMA Model - 30 Day Forecasted Footfall:")
    print(arima_forecast)
    print(f'ARIMA Model - RMSE: {arima_rmse}')
    print(f'ARIMA Model - MAE: {arima_mae}')

    # Plotting ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(pd.date_range(daily_footfall.index[-30], periods=30, freq='D'), arima_forecast, label='Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Footfall')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()
except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()
daily_footfall_prophet.columns = ['ds', 'y']

prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=30)  # Match the length of the test set
forecast_prophet = prophet_model.predict(future)

# Evaluate Prophet model
try:
    # Extracting the last 30 days of forecast data for comparison
    prophet_forecast_last_30 = forecast_prophet[['ds', 'yhat']].tail(30)
    prophet_test = daily_footfall_prophet.tail(30).set_index('ds')

    prophet_rmse = np.sqrt(mean_squared_error(prophet_test['y'], prophet_forecast_last_30['yhat']))
    prophet_mae = mean_absolute_error(prophet_test['y'], prophet_forecast_last_30['yhat'])

    # Display Prophet Forecasted Data and Evaluation Metrics
    print("Prophet Model - 30 Day Forecasted Footfall:")
    print(prophet_forecast_last_30)
    print(f'Prophet Model - RMSE: {prophet_rmse}')
    print(f'Prophet Model - MAE: {prophet_mae}')

    # Plotting Prophet Forecast
    fig = prophet_model.plot(forecast_prophet)
    plt.title('Prophet Model Forecast')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.show()
except Exception as e:
    print(f"An error occurred with Prophet model: {e}")


DAILY FORECASTING DATA

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima  # Use auto_arima for automatic ARIMA order selection
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall, label='Daily Footfall', color='blue')
plt.title('Daily Metro Footfall Over Time')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()  # Adding legend
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(daily_footfall['transaction_id'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Use the entire dataset for training the ARIMA model
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()
    
    # Forecasting the next 30 days
    arima_forecast = arima_model_fit.forecast(steps=365)
    
    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=365)
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Display ARIMA Forecasted Data
    print("ARIMA Model - 30 Day Forecasted Footfall:")
    print(arima_forecast_df)

    # Plotting ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Footfall')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()  # Adding legend
    plt.show()
except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()

# Drop any additional columns and rename correctly
daily_footfall_prophet.columns = ['ds', 'y']  # Rename for Prophet

# Ensure 'ds' is datetime and 'y' is numeric
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 30 days into the future
forecast_prophet = prophet_model.predict(future)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].tail(365)
prophet_forecast_df.columns = ['timestamp', 'Prophet_Forecast']

# Merging ARIMA and Prophet Forecasts
forecast_df = pd.merge(arima_forecast_df, prophet_forecast_df, on='timestamp', how='outer')

# Save to CSV file
forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\daily_forecasted_data.csv', index=False)

# Display the merged forecast DataFrame
print(forecast_df.head())


forecasting daily , weekly , monthly , yearly , weekend and weekday txns

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima  # Use auto_arima for automatic ARIMA order selection
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Calculate Weekly, Monthly, Yearly, Weekend, and Weekday Footfall
weekly_footfall = df.resample('W').count()  # Weekly footfall
monthly_footfall = df.resample('M').count()  # Monthly footfall
yearly_footfall = df.resample('Y').count()  # Yearly footfall

# Weekend and Weekday Footfall
# Create a 'day_of_week' column for easier filtering
df['day_of_week'] = df.index.dayofweek  # Monday=0, Sunday=6
weekend_footfall = df[df['day_of_week'] >= 5].resample('D').count()  # Weekend (Saturday=5, Sunday=6)
weekday_footfall = df[df['day_of_week'] < 5].resample('D').count()  # Weekday (Monday=0 to Friday=4)

# Drop the 'day_of_week' column from aggregated data
df.drop(columns=['day_of_week'], inplace=True)

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(daily_footfall, label='Daily Footfall', color='blue')
plt.title('Daily Metro Footfall Over Time')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.legend()  # Adding legend
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(daily_footfall['transaction_id'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Use the entire dataset for training the ARIMA model
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()
    
    # Forecasting the next 365 days
    arima_forecast = arima_model_fit.forecast(steps=365)
    
    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=365)
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Display ARIMA Forecasted Data
    print("ARIMA Model - 365 Day Forecasted Footfall:")
    print(arima_forecast_df)

    # Plotting ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Footfall')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()  # Adding legend
    plt.show()
except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()

# Drop any additional columns and rename correctly
daily_footfall_prophet.columns = ['ds', 'y']  # Rename for Prophet

# Ensure 'ds' is datetime and 'y' is numeric
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 365 days into the future
forecast_prophet = prophet_model.predict(future)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].tail(365)
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall.csv', index=False)

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())

# The following lines are commented out or removed to focus on saving only the Prophet forecast
# Save Aggregated Data to CSV
weekly_footfall.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_footfall.csv')
monthly_footfall.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_footfall.csv')
yearly_footfall.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_footfall.csv')
weekend_footfall.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_footfall.csv')
weekday_footfall.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_footfall.csv')


GRAPH PLOTTING FOR EVERY ASPECT

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_raw_data(data_type, file_paths):
    """
    Plots raw data based on the specified data type from the given file paths.
    Additionally plots weekday vs. weekend data if 'weekday_vs_weekend' is specified.

    Parameters:
    - data_type (str): Type of data to plot ('daily', 'weekly', 'monthly', 'yearly', 'weekend', 'weekday', 'weekday_vs_weekend').
    - file_paths (dict): Dictionary containing file paths for each data type.
    """
    if data_type == 'weekday_vs_weekend':
        # Load Weekend Dataset
        weekend_file_path = file_paths['weekend']
        weekend_df = pd.read_csv(weekend_file_path)
        weekend_df['timestamp'] = pd.to_datetime(weekend_df['timestamp'])
        weekend_df.set_index('timestamp', inplace=True)
        
        # Load Weekday Dataset
        weekday_file_path = file_paths['weekday']
        weekday_df = pd.read_csv(weekday_file_path)
        weekday_df['timestamp'] = pd.to_datetime(weekday_df['timestamp'])
        weekday_df.set_index('timestamp', inplace=True)
        
        # Print the raw data to check its structure
        print("Raw weekend data:")
        print(weekend_df.head())
        print("Raw weekday data:")
        print(weekday_df.head())
        
        # Plotting Weekday vs Weekend
        plt.figure(figsize=(12, 6))
        plt.plot(weekend_df.index, weekend_df['transaction_id'], label='Weekend Footfall', color='purple')
        plt.plot(weekday_df.index, weekday_df['transaction_id'], label='Weekday Footfall', color='cyan')
        plt.title('Weekday vs Weekend Footfall Over Time')
        plt.xlabel('Timestamp')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    elif data_type in file_paths:
        # Load Dataset
        file_path = file_paths[data_type]
        df = pd.read_csv(file_path)
        
        # Data Preprocessing
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        
        # Print the raw data to check its structure
        print(f"Raw data for {data_type}:")
        print(df.head())
        
        # Plotting the raw data
        plt.figure(figsize=(12, 6))
        plt.plot(df.index, df['transaction_id'], label=f'Raw {data_type.capitalize()} Footfall', color='blue')
        plt.title(f'Raw {data_type.capitalize()} Footfall Over Time')
        plt.xlabel('Timestamp')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    else:
        print(f"File path for '{data_type}' data type is not provided.")

# Example usage
file_paths = {
    'daily': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall.csv',
    'weekly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_footfall.csv',
    'monthly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_footfall.csv',
    'yearly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_footfall.csv',
    'weekend': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_footfall.csv',
    'weekday': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_footfall.csv'
}

data_type = 'weekly'  # Replace with desired data type ('daily', 'weekly', 'monthly', 'yearly', 'weekend', 'weekday', 'weekday_vs_weekend')
plot_raw_data(data_type, file_paths)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_raw_data(data_type, file_paths):
    """
    Plots raw data based on the specified data type from the given file paths.
    Additionally plots weekday vs. weekend data if 'weekday_vs_weekend' is specified.
    Aggregates daily data into monthly totals if 'monthly_aggregation' is specified.

    Parameters:
    - data_type (str): Type of data to plot ('daily', 'weekly', 'monthly', 'yearly', 'weekend', 'weekday', 'weekday_vs_weekend', 'monthly_aggregation').
    - file_paths (dict): Dictionary containing file paths for each data type.
    """
    if data_type == 'weekday_vs_weekend':
        # Load Weekend Dataset
        weekend_file_path = file_paths['weekend']
        weekend_df = pd.read_csv(weekend_file_path)
        weekend_df['timestamp'] = pd.to_datetime(weekend_df['timestamp'])
        weekend_df.set_index('timestamp', inplace=True)
        
        # Load Weekday Dataset
        weekday_file_path = file_paths['weekday']
        weekday_df = pd.read_csv(weekday_file_path)
        weekday_df['timestamp'] = pd.to_datetime(weekday_df['timestamp'])
        weekday_df.set_index('timestamp', inplace=True)
        
        # Print the raw data to check its structure
        print("Raw weekend data:")
        print(weekend_df.head())
        print("Raw weekday data:")
        print(weekday_df.head())
        
        # Plotting Weekday vs Weekend
        plt.figure(figsize=(12, 6))
        plt.plot(weekend_df.index, weekend_df['transaction_id'], label='Weekend Footfall', color='purple')
        plt.plot(weekday_df.index, weekday_df['transaction_id'], label='Weekday Footfall', color='cyan')
        plt.title('Weekday vs Weekend Footfall Over Time')
        plt.xlabel('Timestamp')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    elif data_type == 'monthly_aggregation':
        # Load Daily Dataset
        daily_file_path = file_paths['daily']
        daily_df = pd.read_csv(daily_file_path)
        daily_df['timestamp'] = pd.to_datetime(daily_df['timestamp'])
        daily_df.set_index('timestamp', inplace=True)
        
        # Aggregate Data to Monthly Totals
        monthly_footfall = daily_df.resample('M').sum()  # Aggregating daily data into monthly totals
        
        # Print the aggregated data
        print("Monthly Aggregated Data:")
        print(monthly_footfall.head())
        
        # Optionally, save the aggregated data to a new CSV file
        aggregated_file_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_aggregated_data.csv'
        monthly_footfall.to_csv(aggregated_file_path)
        print(f"Aggregated data saved to {aggregated_file_path}")
        
        # Plotting the aggregated monthly data
        plt.figure(figsize=(12, 6))
        plt.plot(monthly_footfall.index, monthly_footfall['transaction_id'], label='Monthly Footfall', color='orange')
        plt.title('Monthly Footfall Aggregated from Daily Data')
        plt.xlabel('Timestamp')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    elif data_type in file_paths:
        # Load Dataset
        file_path = file_paths[data_type]
        df = pd.read_csv(file_path)
        
        # Data Preprocessing
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        
        # Print the raw data to check its structure
        print(f"Raw data for {data_type}:")
        print(df.head())
        
        # Plotting the raw data
        plt.figure(figsize=(12, 6))
        plt.plot(df.index, df['transaction_id'], label=f'Raw {data_type.capitalize()} Footfall', color='blue')
        plt.title(f'Raw {data_type.capitalize()} Footfall Over Time')
        plt.xlabel('Timestamp')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    else:
        print(f"File path for '{data_type}' data type is not provided.")

# Example usage
file_paths = {
    'daily': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall.csv',
    'weekly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_footfall.csv',
    'monthly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_footfall.csv',
    'yearly': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_footfall.csv',
    'weekend': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_footfall.csv',
    'weekday': r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_footfall.csv'
}

data_type = 'weekday_vs_weekend'  # Replace with desired data type ('daily', 'weekly', 'monthly', 'yearly', 'weekend', 'weekday', 'weekday_vs_weekend', 'monthly_aggregation')
plot_raw_data(data_type, file_paths)


new forecast 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# ARIMA Forecasting
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()
    
    # Forecasting the next 365 days
    arima_forecast = arima_model_fit.forecast(steps=365)
    
    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=365)
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Display ARIMA Forecasted Data
    print("ARIMA Model - 365 Day Forecasted Footfall:")
    print(arima_forecast_df)

    # Aggregation of ARIMA Forecast
    daily_forecast = arima_forecast_df.copy()
    daily_forecast.set_index('timestamp', inplace=True)
    weekly_forecast = daily_forecast.resample('W').sum()
    monthly_forecast = daily_forecast.resample('M').sum()
    yearly_forecast = daily_forecast.resample('Y').sum()
    weekend_forecast = daily_forecast[daily_forecast.index.dayofweek >= 5].resample('D').sum()
    weekday_forecast = daily_forecast[daily_forecast.index.dayofweek < 5].resample('D').sum()

    # Plot ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Footfall')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()
daily_footfall_prophet.columns = ['ds', 'y']
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 365 days into the future
forecast_prophet = prophet_model.predict(future)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].tail(365)
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall_forecast_prophet.csv', index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('timestamp', inplace=True)
weekly_forecast_prophet = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet = daily_forecast_prophet.resample('Y').sum()
weekend_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Save Aggregated Forecast Data to CSV
weekly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_forecast_prophet.csv')
monthly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_forecast_prophet.csv')
yearly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_forecast_prophet.csv')
weekend_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_forecast_prophet.csv')
weekday_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_forecast_prophet.csv')

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# ARIMA Forecasting
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()
    
    # Forecasting the next 365 days
    arima_forecast = arima_model_fit.forecast(steps=365)
    
    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=365)
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Display ARIMA Forecasted Data
    print("ARIMA Model - 365 Day Forecasted Footfall:")
    print(arima_forecast_df)

    # Aggregation of ARIMA Forecast
    daily_forecast = arima_forecast_df.copy()
    daily_forecast.set_index('timestamp', inplace=True)
    weekly_forecast = daily_forecast.resample('W').sum()
    monthly_forecast = daily_forecast.resample('M').sum()
    yearly_forecast = daily_forecast.resample('Y').sum()
    weekend_forecast = daily_forecast[daily_forecast.index.dayofweek >= 5].resample('D').sum()
    weekday_forecast = daily_forecast[daily_forecast.index.dayofweek < 5].resample('D').sum()

    # Add week of year to weekly forecast
    weekly_forecast['week_of_year'] = weekly_forecast.index.isocalendar().week

    # Add month name to monthly forecast
    monthly_forecast['month_name'] = monthly_forecast.index.month_name()

    # Plot ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Footfall')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()
daily_footfall_prophet.columns = ['ds', 'y']
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)
future = prophet_model.make_future_dataframe(periods=365)  # Forecast 365 days into the future
forecast_prophet = prophet_model.predict(future)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].tail(365)
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall_forecast_prophet.csv', index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('timestamp', inplace=True)
weekly_forecast_prophet = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet = daily_forecast_prophet.resample('Y').sum()
weekend_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year to weekly forecast
weekly_forecast_prophet['week_of_year'] = weekly_forecast_prophet.index.isocalendar().week

# Add month name to monthly forecast
monthly_forecast_prophet['month_name'] = monthly_forecast_prophet.index.month_name()

# Save Aggregated Forecast Data to CSV
weekly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_forecast_prophet.csv')
monthly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_forecast_prophet.csv')
yearly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_forecast_prophet.csv')
weekend_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_forecast_prophet.csv')
weekday_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_forecast_prophet.csv')

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())


user input forecasting

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Input start and end date for forecasting
start_date = input("Enter the start date for forecasting (YYYY-MM-DD): ")
end_date = input("Enter the end date for forecasting (YYYY-MM-DD): ")

# Convert input dates to pandas datetime
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

# Number of days for forecasting based on the entered dates
forecast_period = (end_date - daily_footfall.index[-1]).days

if forecast_period <= 0:
    print("Error: The end date must be after the last available date in the dataset.")
else:
    # ARIMA Forecasting
    try:
        # Automatically determine ARIMA order
        auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
        print("Auto ARIMA Model Summary:")
        print(auto_model.summary())

        # Fit ARIMA model with the best parameters
        arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
        arima_model_fit = arima_model.fit()

        # Forecast for the specified period
        arima_forecast = arima_model_fit.forecast(steps=forecast_period)

        # Creating a DataFrame for ARIMA Forecast
        arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=forecast_period)
        arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

        # Display ARIMA Forecasted Data
        print("ARIMA Model - Forecasted Footfall:")
        print(arima_forecast_df)

        # Aggregation of ARIMA Forecast
        daily_forecast = arima_forecast_df.copy()
        daily_forecast.set_index('timestamp', inplace=True)
        weekly_forecast = daily_forecast.resample('W').sum()
        monthly_forecast = daily_forecast.resample('M').sum()
        yearly_forecast = daily_forecast.resample('Y').sum()
        weekend_forecast = daily_forecast[daily_forecast.index.dayofweek >= 5].resample('D').sum()
        weekday_forecast = daily_forecast[daily_forecast.index.dayofweek < 5].resample('D').sum()

        # Plot ARIMA Forecast
        plt.figure(figsize=(10, 6))
        plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
        plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
        plt.title(f'ARIMA Model: Observed vs Forecasted Footfall ({start_date} to {end_date})')
        plt.xlabel('Date')
        plt.ylabel('Footfall')
        plt.legend()
        plt.show()

    except Exception as e:
        print(f"An error occurred with ARIMA model: {e}")

    # Prophet Model - Alternative Time-Series Forecasting
    daily_footfall_prophet = daily_footfall.reset_index()
    daily_footfall_prophet.columns = ['ds', 'y']
    daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
    daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

    # Fit Prophet model
    prophet_model = Prophet()
    prophet_model.fit(daily_footfall_prophet)

    # Generate future dataframe for the specified forecasting period
    future_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), end=end_date, freq='D')
    future_df = pd.DataFrame({'ds': future_dates})

    # Predict using Prophet model
    forecast_prophet = prophet_model.predict(future_df)

    # Creating a DataFrame for Prophet Forecast
    prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
    prophet_forecast_df.columns = ['timestamp', 'transaction_id']

    # Display the Prophet Forecast DataFrame
    print(prophet_forecast_df.head())

    # Save Prophet Forecast to CSV
    prophet_forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall_forecast_prophet.csv', index=False)

    # Plot Prophet Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(prophet_forecast_df['timestamp'], prophet_forecast_df['transaction_id'], label='Prophet Forecast', color='green')
    plt.title(f'Prophet Model: Observed vs Forecasted Footfall ({start_date} to {end_date})')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

print("Forecasting completed successfully!")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
forecast_period = (end_date - start_date).days

# ARIMA Forecasting
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()

    # Adjust the forecast range based on the start and end dates provided
    forecast_start_index = (start_date - daily_footfall.index[-1]).days  # Days from last data point to start date
    arima_forecast = arima_model_fit.forecast(steps=(forecast_period + forecast_start_index))

    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=len(arima_forecast))
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Filter the forecast to start exactly from the user-specified start date
    arima_forecast_df = arima_forecast_df[arima_forecast_df['timestamp'] >= start_date]

    # Display ARIMA Forecasted Data
    print(f"ARIMA Model - Forecasted Footfall from {start_date.date()} to {end_date.date()}:")
    print(arima_forecast_df)

    # Aggregation of ARIMA Forecast
    daily_forecast = arima_forecast_df.copy()
    daily_forecast.set_index('timestamp', inplace=True)
    weekly_forecast = daily_forecast.resample('W').sum()
    monthly_forecast = daily_forecast.resample('M').sum()
    yearly_forecast = daily_forecast.resample('Y').sum()
    weekend_forecast = daily_forecast[daily_forecast.index.dayofweek >= 5].resample('D').sum()
    weekday_forecast = daily_forecast[daily_forecast.index.dayofweek < 5].resample('D').sum()

    # Add week of year and day of week to weekly forecast
    weekly_forecast['week_of_year'] = weekly_forecast.index.isocalendar().week
    weekly_forecast['day_of_week'] = weekly_forecast.index.day_name()

    # Add month name to monthly forecast
    monthly_forecast['month_name'] = monthly_forecast.index.month_name()

    # Add day of week to weekend and weekday forecasts
    weekend_forecast['day_of_week'] = weekend_forecast.index.day_name()
    weekday_forecast['day_of_week'] = weekday_forecast.index.day_name()

    # Plot ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title(f'ARIMA Model: Observed vs Forecasted Footfall ({start_date.date()} to {end_date.date()})')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()
daily_footfall_prophet.columns = ['ds', 'y']
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)

# Create future dataframe starting from the user-specified start_date
future_df = pd.date_range(start=start_date, end=end_date)
future_prophet = pd.DataFrame({'ds': future_df})
forecast_prophet = prophet_model.predict(future_prophet)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\daily_footfall_forecast_prophet.csv', index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('timestamp', inplace=True)
weekly_forecast_prophet = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet = daily_forecast_prophet.resample('Y').sum()
weekend_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year and day of week to weekly forecast
weekly_forecast_prophet['week_of_year'] = weekly_forecast_prophet.index.isocalendar().week
weekly_forecast_prophet['day_of_week'] = weekly_forecast_prophet.index.day_name()

# Add month name to monthly forecast
monthly_forecast_prophet['month_name'] = monthly_forecast_prophet.index.month_name()

# Add day of week to weekend and weekday forecasts
weekend_forecast_prophet['day_of_week'] = weekend_forecast_prophet.index.day_name()
weekday_forecast_prophet['day_of_week'] = weekday_forecast_prophet.index.day_name()

# Save Aggregated Forecast Data to CSV
weekly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekly_forecast_prophet.csv')
monthly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\monthly_forecast_prophet.csv')
yearly_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\yearly_forecast_prophet.csv')
weekend_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekend_forecast_prophet.csv')
weekday_forecast_prophet.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\weekday_forecast_prophet.csv')

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())

print("Files saved to their respective destinations")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np
import os

# Load Dataset
df = pd.read_csv('transaction_updated.csv')  # Replace with your dataset path

# Data Preprocessing
# Handling the date format explicitly
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', errors='coerce')

# Check for any rows where parsing failed
invalid_dates = df[df['timestamp'].isnull()]
if not invalid_dates.empty:
    print("Some dates could not be parsed:")
    print(invalid_dates)

# Drop rows with invalid dates if needed
df = df.dropna(subset=['timestamp'])

# Continue with setting the index
df.set_index('timestamp', inplace=True)

# Filtering based on user input for stn no and EqN
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")

# Apply filters
if stn_no.lower() != 'all':
    df = df[df['stn_no'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Different Levels
daily_footfall = df.resample('D').count()  # Daily footfall

# Time-Series Decomposition
decomposition = seasonal_decompose(daily_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
forecast_period = (end_date - start_date).days

# File Path Base
base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'

# ARIMA Forecasting
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(daily_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(daily_footfall['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()

    # Adjust the forecast range based on the start and end dates provided
    forecast_start_index = (start_date - daily_footfall.index[-1]).days  # Days from last data point to start date
    arima_forecast = arima_model_fit.forecast(steps=(forecast_period + forecast_start_index))

    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_dates = pd.date_range(daily_footfall.index[-1] + pd.Timedelta(days=1), periods=len(arima_forecast))
    arima_forecast_df = pd.DataFrame({'timestamp': arima_forecast_dates, 'ARIMA_Forecast': arima_forecast})

    # Filter the forecast to start exactly from the user-specified start date
    arima_forecast_df = arima_forecast_df[arima_forecast_df['timestamp'] >= start_date]

    # Display ARIMA Forecasted Data
    print(f"ARIMA Model - Forecasted Footfall from {start_date.date()} to {end_date.date()}:")
    print(arima_forecast_df)

    # Aggregation of ARIMA Forecast
    daily_forecast = arima_forecast_df.copy()
    daily_forecast.set_index('timestamp', inplace=True)
    weekly_forecast = daily_forecast.resample('W').sum()
    monthly_forecast = daily_forecast.resample('M').sum()
    yearly_forecast = daily_forecast.resample('Y').sum()
    weekend_forecast = daily_forecast[daily_forecast.index.dayofweek >= 5].resample('D').sum()
    weekday_forecast = daily_forecast[daily_forecast.index.dayofweek < 5].resample('D').sum()

    # Add week of year and day of week to weekly forecast
    weekly_forecast['week_of_year'] = weekly_forecast.index.isocalendar().week
    weekly_forecast['day_of_week'] = weekly_forecast.index.day_name()

    # Add month name to monthly forecast
    monthly_forecast['month_name'] = monthly_forecast.index.month_name()

    # Add day of week to weekend and weekday forecasts
    weekend_forecast['day_of_week'] = weekend_forecast.index.day_name()
    weekday_forecast['day_of_week'] = weekday_forecast.index.day_name()

    # Save ARIMA Forecast to CSV
    arima_forecast_df.to_csv(os.path.join(base_path, f'arima_forecast_{stn_no}_{eqn_no}.csv'), index=False)

    # Save Aggregated ARIMA Forecast Data to CSV
    weekly_forecast.to_csv(os.path.join(base_path, f'weekly_forecast_arima_{stn_no}_{eqn_no}.csv'))
    monthly_forecast.to_csv(os.path.join(base_path, f'monthly_forecast_arima_{stn_no}_{eqn_no}.csv'))
    yearly_forecast.to_csv(os.path.join(base_path, f'yearly_forecast_arima_{stn_no}_{eqn_no}.csv'))
    weekend_forecast.to_csv(os.path.join(base_path, f'weekend_forecast_arima_{stn_no}_{eqn_no}.csv'))
    weekday_forecast.to_csv(os.path.join(base_path, f'weekday_forecast_arima_{stn_no}_{eqn_no}.csv'))

    # Plot ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(daily_footfall.index, daily_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title(f'ARIMA Model: Observed vs Forecasted Footfall ({start_date.date()} to {end_date.date()})')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
daily_footfall_prophet = daily_footfall.reset_index()

# Check the columns in the DataFrame
print(daily_footfall_prophet.columns)  # This will help to understand the columns before renaming

# Ensure the DataFrame only has 'timestamp' and 'transaction_id' after resetting the index
if 'timestamp' in daily_footfall_prophet.columns and 'transaction_id' in daily_footfall_prophet.columns:
    daily_footfall_prophet = daily_footfall_prophet[['timestamp', 'transaction_id']]  # Keep only necessary columns
    daily_footfall_prophet.columns = ['ds', 'y']  # Rename columns for Prophet
else:
    raise ValueError("Expected columns 'timestamp' and 'transaction_id' not found in the DataFrame after resetting index")

# Convert 'ds' to datetime and 'y' to numeric
daily_footfall_prophet['ds'] = pd.to_datetime(daily_footfall_prophet['ds'])
daily_footfall_prophet['y'] = pd.to_numeric(daily_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(daily_footfall_prophet)

# Create future dataframe starting from the user-specified start_date
future_df = pd.date_range(start=start_date, end=end_date)
future_prophet = pd.DataFrame({'ds': future_df})
forecast_prophet = prophet_model.predict(future_prophet)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(os.path.join(base_path, f'daily_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'), index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('timestamp', inplace=True)
weekly_forecast_prophet = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet = daily_forecast_prophet.resample('Y').sum()
weekend_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year and day of week to weekly forecast
weekly_forecast_prophet['week_of_year'] = weekly_forecast_prophet.index.isocalendar().week
weekly_forecast_prophet['day_of_week'] = weekly_forecast_prophet.index.day_name()

# Add month name to monthly forecast
monthly_forecast_prophet['month_name'] = monthly_forecast_prophet.index.month_name()

# Add day of week to weekend and weekday forecasts
weekend_forecast_prophet['day_of_week'] = weekend_forecast_prophet.index.day_name()
weekday_forecast_prophet['day_of_week'] = weekday_forecast_prophet.index.day_name()

# Save Aggregated Forecast Data to CSV
weekly_forecast_prophet.to_csv(os.path.join(base_path, f'weekly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
monthly_forecast_prophet.to_csv(os.path.join(base_path, f'monthly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
yearly_forecast_prophet.to_csv(os.path.join(base_path, f'yearly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekend_forecast_prophet.to_csv(os.path.join(base_path, f'weekend_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekday_forecast_prophet.to_csv(os.path.join(base_path, f'weekday_forecast_prophet_{stn_no}_{eqn_no}.csv'))

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())

print("Files saved to their respective destinations")


HOURLY FORECAST

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Input: Define Start and End Date for Filtering
start_date = '2024-09-01 00:00:00'  # Replace with your start date
end_date = '2024-09-30 11:57:00'    # Replace with your end date

# Filter data based on the specified date range
df_filtered = df.loc[start_date:end_date]

# Aggregating the Footfall at an Hourly Level
hourly_footfall = df_filtered.resample('H').count()  # Hourly footfall

# Grouping by hour (0 to 23) to get the sum of transactions for each hour across all days
hourly_sums = hourly_footfall.groupby(hourly_footfall.index.hour).sum()

# Exploratory Data Analysis (EDA) - Visualizing Trends
plt.figure(figsize=(10, 6))
plt.plot(hourly_sums.index, hourly_sums['transaction_id'], label='Sum of Transactions per Hour', color='blue')
plt.title(f'Sum of Transactions for Each Hour Across {start_date} to {end_date}')
plt.xlabel('Hour of the Day')
plt.ylabel('Sum of Transactions')
plt.legend()
plt.show()

# Stationarity Check using Augmented Dickey-Fuller Test
result = adfuller(hourly_sums['transaction_id'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')

# Ensure there are enough observations (at least 24) for decomposition
if len(hourly_sums) >= 24:
    # Time-Series Decomposition
    decomposition = seasonal_decompose(hourly_sums['transaction_id'], model='additive', period=12)
    fig = decomposition.plot()
    fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals (Hourly Sum)', y=1.02)
    plt.show()
else:
    print(f"Not enough data points for decomposition. Required: 24, Found: {len(hourly_sums)}")

# Use the filtered dataset for training the ARIMA model
try:
    # Automatically determine ARIMA order
    auto_model = auto_arima(hourly_sums['transaction_id'].dropna(), seasonal=False, stepwise=True)
    print("Auto ARIMA Model Summary:")
    print(auto_model.summary())

    # Fit ARIMA model with the best parameters
    arima_model = ARIMA(hourly_sums['transaction_id'], order=auto_model.order)
    arima_model_fit = arima_model.fit()
    
    # Forecasting the next 24 hours (1 day)
    arima_forecast = arima_model_fit.forecast(steps=24)
    
    # Creating a DataFrame for ARIMA Forecast
    arima_forecast_df = pd.DataFrame({'hour': np.arange(0, 24), 
                                      'ARIMA_Forecast': arima_forecast})

    # Display ARIMA Forecasted Data
    print("ARIMA Model - 24 Hour Forecasted Sum of Transactions:")
    print(arima_forecast_df)

    # Plotting ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(hourly_sums.index, hourly_sums['transaction_id'], label='Observed Sum of Transactions', color='blue')
    plt.plot(arima_forecast_df['hour'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title('ARIMA Model: Observed vs Forecasted Sum of Transactions for Each Hour')
    plt.xlabel('Hour of the Day')
    plt.ylabel('Sum of Transactions')
    plt.legend()
    plt.show()
except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Alternative Time-Series Forecasting
hourly_sums_prophet = hourly_sums.reset_index()

# Drop any additional columns and rename correctly
hourly_sums_prophet.columns = ['ds', 'y']  # Rename for Prophet

# Ensure 'ds' is numeric (hour of the day) and 'y' is the sum of transactions
hourly_sums_prophet['ds'] = hourly_sums_prophet['ds'].astype(int)
hourly_sums_prophet['y'] = pd.to_numeric(hourly_sums_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(hourly_sums_prophet)
future = prophet_model.make_future_dataframe(periods=24, freq='H')  # Forecast next 24 hours
forecast_prophet = prophet_model.predict(future)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].tail(24)
prophet_forecast_df.columns = ['hour', 'Prophet_Forecast']

# Convert 'hour' column in prophet_forecast_df to integer format
prophet_forecast_df['hour'] = prophet_forecast_df['hour'].dt.hour

# Ensure 'hour' column in arima_forecast_df is integer
arima_forecast_df['hour'] = arima_forecast_df['hour'].astype(int)

# Merging ARIMA and Prophet Forecasts
forecast_df = pd.merge(arima_forecast_df, prophet_forecast_df, on='hour', how='outer')

# Save to CSV file
forecast_df.to_csv(r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors\hourly_forecasted_sum_transactions.csv', index=False)

# Display the merged forecast DataFrame
print(forecast_df.head())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Input: Define Start and End Date for Filtering
start_date = '2024-09-01 00:00:00'  # Replace with your start date
end_date = '2024-09-30 11:57:00'    # Replace with your end date

# Filter data based on the specified date range
df_filtered = df.loc[start_date:end_date]

# Aggregating the Footfall at an Hourly Level
hourly_footfall = df_filtered.resample('H').count()  # Hourly footfall

# Grouping by hour (0 to 23) to get the sum of transactions for each hour across all days
hourly_sums = hourly_footfall.groupby(hourly_footfall.index.hour).sum()

# --- Train-Test Split ---
# Use 80% of the data for training and the last 20% for testing
split_index = int(0.8 * len(hourly_sums))
train_data = hourly_sums.iloc[:split_index]
test_data = hourly_sums.iloc[split_index:]

# --- ARIMA Forecast ---
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

# Automatically determine ARIMA order using training data
auto_model = auto_arima(train_data['transaction_id'].dropna(), seasonal=False, stepwise=True)

# Fit ARIMA model with the best parameters on train data
arima_model = ARIMA(train_data['transaction_id'], order=auto_model.order)
arima_model_fit = arima_model.fit()

# Forecasting the next len(test_data) hours
arima_forecast = arima_model_fit.forecast(steps=len(test_data))

# Plot ARIMA Forecast vs Actual
plt.figure(figsize=(10, 6))
plt.plot(test_data.index, test_data['transaction_id'], label='Actual', color='blue')
plt.plot(test_data.index, arima_forecast, label='ARIMA Forecast', color='orange')
plt.title('ARIMA Forecast vs Actual')
plt.xlabel('Time')
plt.ylabel('Sum of Transactions')
plt.legend()
plt.show()

# Compute MAE and RMSE for ARIMA
mae_arima = mean_absolute_error(test_data['transaction_id'], arima_forecast)
rmse_arima = np.sqrt(mean_squared_error(test_data['transaction_id'], arima_forecast))

print(f'ARIMA Model MAE: {mae_arima}')
print(f'ARIMA Model RMSE: {rmse_arima}')


# --- Prophet Forecast ---
from prophet import Prophet

# Prepare data for Prophet
hourly_sums_prophet = hourly_sums.reset_index()
hourly_sums_prophet.columns = ['ds', 'y']  # Rename for Prophet

# Train-Test Split for Prophet
train_prophet = hourly_sums_prophet.iloc[:split_index]
test_prophet = hourly_sums_prophet.iloc[split_index:]

# Fit Prophet model on training data
prophet_model = Prophet()
prophet_model.fit(train_prophet)

# Make future dataframe for test period
future = prophet_model.make_future_dataframe(periods=len(test_prophet), freq='H')
forecast_prophet = prophet_model.predict(future)

# Get only the predicted values for the test period
prophet_forecast = forecast_prophet[['ds', 'yhat']].tail(len(test_prophet))['yhat']

# Plot Prophet Forecast vs Actual
plt.figure(figsize=(10, 6))
plt.plot(test_prophet['ds'], test_prophet['y'], label='Actual', color='blue')
plt.plot(test_prophet['ds'], prophet_forecast, label='Prophet Forecast', color='green')
plt.title('Prophet Forecast vs Actual')
plt.xlabel('Time')
plt.ylabel('Sum of Transactions')
plt.legend()
plt.show()

# Compute MAE and RMSE for Prophet
mae_prophet = mean_absolute_error(test_prophet['y'], prophet_forecast)
rmse_prophet = np.sqrt(mean_squared_error(test_prophet['y'], prophet_forecast))

print(f'Prophet Model MAE: {mae_prophet}')
print(f'Prophet Model RMSE: {rmse_prophet}')


checking model fitting on hours 

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
import xgboost as xgb
from keras.models import Sequential
from keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV

# Load the dataset
df = pd.read_csv('transaction_updated.csv')
print(df.shape)

# Define the specific format for your datetime strings
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', errors='coerce')

# Check for any failed parsing
print(f"Number of missing timestamps: {df['timestamp'].isnull().sum()}")

# Drop rows where the timestamp could not be parsed
df = df.dropna(subset=['timestamp'])

# Ensure the 'transaction_id' column is numeric
df['transaction_id'] = pd.to_numeric(df['transaction_id'], errors='coerce')

# Add lag features
for lag in range(1, 25):
    df[f'lag_{lag}'] = df['transaction_id'].shift(lag)

# Fill NaN values
df.fillna(method='bfill', inplace=True)

# Set the timestamp as the index
df.set_index('timestamp', inplace=True)

# Check if there is data before resampling
print(f"Data before resampling: {df.shape}")
print(df.head())

# Resample to hourly data
df = df.resample('H').sum()

# Check if data is empty after resampling
if df.empty:
    raise ValueError("Resampled data is empty. Check your resampling frequency or original data.")

print(f"Data shape after resampling: {df.shape}")
print(df.head())

# Split the data into train and test sets
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

# Print shapes and heads of train and test data
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")
print(f"Train head: {train.head()}")
print(f"Test head: {test.head()}")

# Create function to calculate RMSE and MAE
def evaluate_model(predictions, true_values):
    mae = mean_absolute_error(true_values, predictions)
    rmse = sqrt(mean_squared_error(true_values, predictions))
    return mae, rmse

# Store results
results = {}

# 1. ARIMA
try:
    if len(train) > 0:
        arima_model = ARIMA(train['transaction_id'], order=(5,1,0))
        arima_fit = arima_model.fit()
        arima_pred = arima_fit.forecast(len(test))
        mae, rmse = evaluate_model(arima_pred, test['transaction_id'])
        results['ARIMA'] = (mae, rmse)
    else:
        print("ARIMA Error: Not enough data to fit the model.")
except Exception as e:
    print(f"ARIMA Error: {e}")

# 2. SARIMA
try:
    if len(train) > 0:
        sarima_model = SARIMAX(train['transaction_id'], order=(1,1,1), seasonal_order=(1,1,1,24))
        sarima_fit = sarima_model.fit(disp=False)
        sarima_pred = sarima_fit.forecast(len(test))
        mae, rmse = evaluate_model(sarima_pred, test['transaction_id'])
        results['SARIMA'] = (mae, rmse)
    else:
        print("SARIMA Error: Not enough data to fit the model.")
except Exception as e:
    print(f"SARIMA Error: {e}")

# 3. Exponential Smoothing (ETS)
try:
    if len(train) > 0:
        ets_model = ExponentialSmoothing(train['transaction_id'], seasonal='add', seasonal_periods=24).fit()
        ets_pred = ets_model.forecast(len(test))
        mae, rmse = evaluate_model(ets_pred, test['transaction_id'])
        results['ETS'] = (mae, rmse)
    else:
        print("ETS Error: Not enough data to fit the model.")
except Exception as e:
    print(f"ETS Error: {e}")

# 4. Facebook Prophet
try:
    prophet_df = df.reset_index().rename(columns={'timestamp': 'ds', 'transaction_id': 'y'})
    train_prophet = prophet_df[:train_size]
    test_prophet = prophet_df[train_size:]

    if len(train_prophet) > 1:  # Prophet needs at least 2 rows
        prophet_model = Prophet()
        prophet_model.fit(train_prophet)
        future = prophet_model.make_future_dataframe(periods=len(test_prophet), freq='H')
        prophet_forecast = prophet_model.predict(future)
        prophet_pred = prophet_forecast['yhat'].iloc[-len(test):].values
        mae, rmse = evaluate_model(prophet_pred, test['transaction_id'].values)
        results['Prophet'] = (mae, rmse)
    else:
        print("Prophet Error: Not enough data to fit the model.")
except Exception as e:
    print(f"Prophet Error: {e}")

# 5. LSTM
# Prepare the data for LSTM
try:
    if len(train) > 24:  # Ensure there is enough data for LSTM and TimeseriesGenerator
        scaler = MinMaxScaler()
        train_scaled = scaler.fit_transform(train[['transaction_id']])
        test_scaled = scaler.transform(test[['transaction_id']])

        train_gen = TimeseriesGenerator(train_scaled, train_scaled, length=24, batch_size=1)
        test_gen = TimeseriesGenerator(test_scaled, test_scaled, length=24, batch_size=1)

        lstm_model = Sequential()
        lstm_model.add(LSTM(50, activation='relu', input_shape=(24, 1)))
        lstm_model.add(Dense(1))
        lstm_model.compile(optimizer='adam', loss='mse')

        lstm_model.fit(train_gen, epochs=10, verbose=1)
        lstm_pred = lstm_model.predict(test_gen)
        lstm_pred_rescaled = scaler.inverse_transform(lstm_pred)
        mae, rmse = evaluate_model(lstm_pred_rescaled, test['transaction_id'].values[24:])
        results['LSTM'] = (mae, rmse)
    else:
        print("LSTM Error: Not enough data to fit the model.")
except Exception as e:
    print(f"LSTM Error: {e}")

# 6. XGBoost
try:
    if len(train) > 24:  # Ensure there is enough data for XGBoost
        # Create lag features for XGBoost
        train_xgb = train.dropna()
        test_xgb = test.dropna()

        X_train = train_xgb.drop('transaction_id', axis=1)
        y_train = train_xgb['transaction_id']
        X_test = test_xgb.drop('transaction_id', axis=1)
        y_test = test_xgb['transaction_id']

        # Hyperparameter tuning with GridSearchCV
        param_grid = {
            'n_estimators': [100, 200],
            'max_depth': [6, 10],
            'learning_rate': [0.01, 0.1],
        }
        xgb_model = xgb.XGBRegressor()
        grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='neg_mean_squared_error')
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        xgb_pred = best_model.predict(X_test)
        mae, rmse = evaluate_model(xgb_pred, y_test)
        results['XGBoost'] = (mae, rmse)
    else:
        print("XGBoost Error: Not enough data to fit the model.")
except Exception as e:
    print(f"XGBoost Error: {e}")

# Print the results
for model_name, (mae, rmse) in results.items():
    print(f'{model_name} - MAE: {mae}, RMSE: {rmse}')


whole model

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np
import os

# Load Dataset
df = pd.read_csv('transaction.csv')  # Update path to the uploaded file
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Manually define holidays and festivals
holidays = pd.to_datetime(['2024-01-26', '2024-08-15', '2024-10-02'])  # Replace with real holiday dates
festivals = pd.to_datetime(['2024-03-29', '2024-11-04', '2024-12-25'])  # Replace with real festival dates

# Function to aggregate data based on frequency
def aggregate_data(df, freq):
    return df.resample(freq).count()

# ARIMA Forecast Function
def arima_forecast(data, periods):
    if data.empty:
        print("ARIMA Error: No data available for ARIMA model.")
        return None
    try:
        data_series = data.squeeze()  # Ensure data is a Series
        model = auto_arima(data_series, seasonal=False, stepwise=True)
        arima_model = ARIMA(data_series, order=model.order)
        arima_model_fit = arima_model.fit()
        forecast = arima_model_fit.forecast(steps=periods)
        forecast_df = pd.DataFrame({'timestamp': pd.date_range(start=data.index[-1], periods=periods + 1, freq='D')[1:], 
                                    'ARIMA_Forecast': forecast})
        return forecast_df
    except Exception as e:
        print(f"ARIMA Error: {e}")
        return None

# Prophet Forecast Function
def prophet_forecast(data, periods):
    if data.empty or data.dropna().shape[0] < 2:
        print("Prophet Error: Not enough data available for Prophet model.")
        return None
    try:
        data_prophet = data.reset_index()
        data_prophet.columns = ['ds', 'y']
        model = Prophet()
        model.fit(data_prophet)
        future = model.make_future_dataframe(periods=periods)
        forecast = model.predict(future)
        forecast_df = forecast[['ds', 'yhat']].tail(periods)
        forecast_df.columns = ['timestamp', 'Prophet_Forecast']
        return forecast_df
    except Exception as e:
        print(f"Prophet Error: {e}")
        return None

# Helper function to combine ARIMA and Prophet forecasts
def forecast_combined(data, periods):
    if 'transaction_id' not in data.columns:
        print("Error: 'transaction_id' column is missing in the data.")
        return None
    
    arima_forecast_df = arima_forecast(data['transaction_id'], periods)
    prophet_forecast_df = prophet_forecast(data['transaction_id'], periods)

    if arima_forecast_df is not None and prophet_forecast_df is not None:
        combined_forecast_df = pd.merge(arima_forecast_df, prophet_forecast_df, on='timestamp', how='outer')
    else:
        combined_forecast_df = None
    
    return combined_forecast_df

# Forecast functions with date range filtering
def forecast_hourly(df, start_date, end_date, periods):
    data = df[start_date:end_date]
    if data.empty:
        print("No data available for the specified hourly range.")
        return None
    data = aggregate_data(data, 'H')
    return forecast_combined(data, periods)

def forecast_daily(df, start_date, end_date, periods):
    data = df[start_date:end_date]
    if data.empty:
        print("No data available for the specified daily range.")
        return None
    data = aggregate_data(data, 'D')
    return forecast_combined(data, periods)

def forecast_weekly(df, start_date, end_date, periods):
    data = df[start_date:end_date]
    if data.empty:
        print("No data available for the specified weekly range.")
        return None
    data = aggregate_data(data, 'W')
    return forecast_combined(data, periods)

def forecast_monthly(df, start_date, end_date, periods):
    data = df[start_date:end_date]
    if data.empty:
        print("No data available for the specified monthly range.")
        return None
    data = aggregate_data(data, 'M')
    return forecast_combined(data, periods)

def forecast_yearly(df, start_date, end_date, periods):
    data = df[start_date:end_date]
    if data.empty:
        print("No data available for the specified yearly range.")
        return None
    data = aggregate_data(data, 'Y')
    return forecast_combined(data, periods)

def forecast_festival(df, periods):
    data = df[df.index.isin(festivals)]
    if data.empty:
        print("No data available for the specified festivals.")
        return None
    return forecast_combined(data, periods)

def forecast_holiday(df, periods):
    data = df[df.index.isin(holidays)]
    if data.empty:
        print("No data available for the specified holidays.")
        return None
    return forecast_combined(data, periods)

# Forecast function for transactions per hour of the day
def forecast_hourly_by_hour(df, periods):
    forecast_list = []
    for hour in range(24):  # Loop through each hour of the day
        hour_data = df[df.index.hour == hour]
        if hour_data.empty:
            print(f"No data available for the {hour+1}th hour.")
            continue
        hour_data = aggregate_data(hour_data, 'D')  # Aggregate by day
        forecast = forecast_combined(hour_data, periods)
        if forecast is not None:
            forecast['hour'] = hour + 1  # Add the hour identifier
            forecast_list.append(forecast)

    if forecast_list:
        forecast_df = pd.concat(forecast_list)
        return forecast_df
    else:
        print("Forecasting failed for all hours.")
        return None

# Example usage for forecast with different types
forecast_type = 'hourly_by_hour'  # 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'festival', 'holiday', 'hourly_by_hour'
start_date = '2024-01-01'  # Example start date
end_date = '2024-12-31'    # Example end date
periods = 30  # Number of periods to forecast

if forecast_type == 'hourly':
    forecast = forecast_hourly(df, start_date, end_date, periods)
elif forecast_type == 'daily':
    forecast = forecast_daily(df, start_date, end_date, periods)
elif forecast_type == 'weekly':
    forecast = forecast_weekly(df, start_date, end_date, periods)
elif forecast_type == 'monthly':
    forecast = forecast_monthly(df, start_date, end_date, periods)
elif forecast_type == 'yearly':
    forecast = forecast_yearly(df, start_date, end_date, periods)
elif forecast_type == 'festival':
    forecast = forecast_festival(df, periods)
elif forecast_type == 'holiday':
    forecast = forecast_holiday(df, periods)
elif forecast_type == 'hourly_by_hour':
    forecast = forecast_hourly_by_hour(df, periods)

# Save the forecasted data to CSV
output_dir = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f'{forecast_type}_forecast.csv')

if forecast is not None:
    forecast.to_csv(output_path, index=False)
    print(f"Forecast saved at: {output_path}")
else:
    print("Forecasting failed.")


BASED ON THE NEW DATA FROM OFFICE

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from prophet import Prophet
import os

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path

# Data Preprocessing
# Adjusted to handle the format with 'Z' at the end (UTC time)
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')

# Check for any rows where parsing failed
invalid_dates = df[df['Dt'].isnull()]
if not invalid_dates.empty:
    print("Some dates could not be parsed:")
    print(invalid_dates)

# Drop rows with invalid dates if needed
df = df.dropna(subset=['Dt'])

# Continue with setting the index
df.set_index('Dt', inplace=True)

# Filtering based on user input for station number and equipment number
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")

# Apply filters based on the input
if stn_no.lower() != 'all':
    df = df[df['Sta'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Hourly Level
hourly_footfall = df.resample('H').count()  # Hourly footfall

# Set frequency for hourly_footfall
hourly_footfall = hourly_footfall.asfreq('H')

# Time-Series Decomposition for visual inspection
decomposition = seasonal_decompose(hourly_footfall['_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
forecast_period = (end_date - start_date).days * 24  # Forecast period in hours

# File Path Base
base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'

# Prophet Model - Time-Series Forecasting with Seasonalities
hourly_footfall_prophet = hourly_footfall.reset_index()

# Ensure the DataFrame only has 'timestamp' and 'transaction_id' after resetting the index
hourly_footfall_prophet = hourly_footfall_prophet[['Dt', '_id']]
hourly_footfall_prophet.columns = ['ds', 'y']  # Rename columns for Prophet

# Remove timezone information from the 'ds' column
hourly_footfall_prophet['ds'] = hourly_footfall_prophet['ds'].dt.tz_localize(None)

# Convert 'y' to numeric
hourly_footfall_prophet['y'] = pd.to_numeric(hourly_footfall_prophet['y'], errors='coerce')

# Initialize Prophet model with specific seasonalities
prophet_model = Prophet(
    yearly_seasonality=False,  # Disable built-in yearly seasonality to customize it
    weekly_seasonality=False,  # Disable built-in weekly seasonality
    daily_seasonality=False    # Disable built-in daily seasonality
)

# Add custom seasonalities based on decomposition insights
prophet_model.add_seasonality(name='daily', period=1, fourier_order=10)
prophet_model.add_seasonality(name='weekly', period=7, fourier_order=5)
prophet_model.add_seasonality(name='yearly', period=365.25, fourier_order=10)

# Fit the Prophet model
prophet_model.fit(hourly_footfall_prophet)

# Create future dataframe starting from the user-specified start_date
future_df = pd.date_range(start=start_date, end=end_date, freq='H')
future_prophet = pd.DataFrame({'ds': future_df})

# Forecast with Prophet model
forecast_prophet = prophet_model.predict(future_prophet)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
prophet_forecast_df.columns = ['Dt', '_id']

# Round off the Prophet forecasted data
prophet_forecast_df['_id'] = prophet_forecast_df['_id'].round()

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(os.path.join(base_path, f'hourly_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'), index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('Dt', inplace=True)

# Aggregation: Daily, Weekly, Monthly, Yearly
daily_forecast_prophet_resampled = daily_forecast_prophet.resample('D').sum()
weekly_forecast_prophet_resampled = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet_resampled = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet_resampled = daily_forecast_prophet.resample('Y').sum()

# Weekend and Weekday Aggregation
weekend_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year and day of week to weekly forecast
weekly_forecast_prophet_resampled['week_of_year'] = weekly_forecast_prophet_resampled.index.isocalendar().week
weekly_forecast_prophet_resampled['day_of_week'] = weekly_forecast_prophet_resampled.index.day_name()

# Add month name to monthly forecast
monthly_forecast_prophet_resampled['month_name'] = monthly_forecast_prophet_resampled.index.month_name()

# Add day of week to weekend and weekday forecasts
weekend_forecast_prophet_resampled['day_of_week'] = weekend_forecast_prophet_resampled.index.day_name()
weekday_forecast_prophet_resampled['day_of_week'] = weekday_forecast_prophet_resampled.index.day_name()

# Save Aggregated Forecast Data to CSV
daily_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'daily_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
monthly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'monthly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
yearly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'yearly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekend_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekend_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekday_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekday_forecast_prophet_{stn_no}_{eqn_no}.csv'))

# Aggregation of Prophet Forecast Data by Hour of Day
prophet_forecast_df['hour'] = prophet_forecast_df['Dt'].dt.hour
hourly_aggr_prophet = prophet_forecast_df.groupby('hour')['_id'].sum().reset_index()

# Save Aggregated Hourly Prophet Forecast Data to CSV
hourly_aggr_prophet.to_csv(os.path.join(base_path, f'hourly_aggr_prophet_forecast_{stn_no}_{eqn_no}.csv'), index=False)

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())

print("Files saved to their respective destinations")


FORECAST VALIDATION

In [None]:
# Load the forecasted data for validation
forecast_prophet_df = pd.read_csv(os.path.join(base_path, f'hourly_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'))

# Convert 'Dt' back to datetime
forecast_prophet_df['Dt'] = pd.to_datetime(forecast_prophet_df['Dt'])

# Merge forecast with actual data for comparison
hourly_actual_prophet = hourly_footfall_prophet.set_index('ds').reindex(forecast_prophet_df['Dt']).reset_index()
hourly_actual_prophet.columns = ['Dt', 'Actual']
merged_df = pd.merge(forecast_prophet_df, hourly_actual_prophet, on='Dt', how='left')

# Plot actual vs forecast
plt.figure(figsize=(12, 6))
plt.plot(merged_df['Dt'], merged_df['_id'], label='Forecast', color='blue')
plt.plot(merged_df['Dt'], merged_df['Actual'], label='Actual', color='orange')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.title('Forecast vs Actual')
plt.legend()
plt.show()

# Plot forecast components
prophet_model.plot_components(forecast_prophet)
plt.show()


whole old model

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np
import os

# Load Dataset
df = pd.read_csv('transaction.csv')  # Replace with your dataset path
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Manually define holidays and festivals
holidays = pd.to_datetime(['2024-01-26', '2024-08-15', '2024-10-02'])  # Replace with real holiday dates
festivals = pd.to_datetime(['2024-03-29', '2024-11-04', '2024-12-25'])  # Replace with real festival dates

# Function to aggregate data based on frequency
def aggregate_data(df, freq):
    return df.resample(freq).count()

# ARIMA Forecast Function
def arima_forecast(data, periods):
    try:
        model = auto_arima(data, seasonal=False, stepwise=True)
        arima_model = ARIMA(data, order=model.order)
        arima_model_fit = arima_model.fit()
        forecast = arima_model_fit.forecast(steps=periods)
        forecast_df = pd.DataFrame({'timestamp': pd.date_range(start=data.index[-1], periods=periods + 1, freq='D')[1:], 
                                    'ARIMA_Forecast': forecast})
        return forecast_df
    except Exception as e:
        print(f"ARIMA Error: {e}")
        return None

# Prophet Forecast Function
def prophet_forecast(data, periods):
    try:
        data_prophet = data.reset_index()
        data_prophet.columns = ['ds', 'y']
        model = Prophet()
        model.fit(data_prophet)
        future = model.make_future_dataframe(periods=periods)
        forecast = model.predict(future)
        forecast_df = forecast[['ds', 'yhat']].tail(periods)
        forecast_df.columns = ['timestamp', 'Prophet_Forecast']
        return forecast_df
    except Exception as e:
        print(f"Prophet Error: {e}")
        return None

# Functions for hourly, daily, weekly, monthly, yearly, festival, and holiday
def forecast_hourly(df, periods):
    data = aggregate_data(df, 'H')
    return forecast_combined(data, periods)

def forecast_daily(df, periods):
    data = aggregate_data(df, 'D')
    return forecast_combined(data, periods)

def forecast_weekly(df, periods):
    data = aggregate_data(df, 'W')
    return forecast_combined(data, periods)

def forecast_monthly(df, periods, month_of_year):
    data = df[df.index.month == month_of_year]
    return forecast_combined(data, periods)

def forecast_yearly(df, periods):
    data = aggregate_data(df, 'Y')
    return forecast_combined(data, periods)

def forecast_festival(df, periods):
    data = df[df.index.isin(festivals)]
    return forecast_combined(data, periods)

def forecast_holiday(df, periods):
    data = df[df.index.isin(holidays)]
    return forecast_combined(data, periods)

# Helper function to combine ARIMA and Prophet forecasts
def forecast_combined(data, periods):
    arima_forecast_df = arima_forecast(data['transaction_id'], periods)
    prophet_forecast_df = prophet_forecast(data['transaction_id'], periods)

    if arima_forecast_df is not None and prophet_forecast_df is not None:
        combined_forecast_df = pd.merge(arima_forecast_df, prophet_forecast_df, on='timestamp', how='outer')
    else:
        combined_forecast_df = None
    
    return combined_forecast_df

# Example usage for weekly forecast
forecast_type = 'monthly'  # 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'festival', 'holiday'
periods = 365  # Number of periods to forecast
month_of_year = 3  # Example: for forecasting a specific month (March)

if forecast_type == 'hourly':
    forecast = forecast_hourly(df, periods)
elif forecast_type == 'daily':
    forecast = forecast_daily(df, periods)
elif forecast_type == 'weekly':
    forecast = forecast_weekly(df, periods)
elif forecast_type == 'monthly':
    forecast = forecast_monthly(df, periods, month_of_year)
elif forecast_type == 'yearly':
    forecast = forecast_yearly(df, periods)
elif forecast_type == 'festival':
    forecast = forecast_festival(df, periods)
elif forecast_type == 'holiday':
    forecast = forecast_holiday(df, periods)

# Save the forecasted data to CSV
output_dir = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f'{forecast_type}_forecast.csv')

if forecast is not None:
    forecast.to_csv(output_path, index=False)
    print(f"Forecast saved at: {output_path}")
else:
    print("Forecasting failed.")

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import numpy as np
import os
from joblib import Parallel, delayed

# Load Dataset
df = pd.read_csv('transaction.csv')  # Update path to the uploaded file
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Manually define holidays and festivals
holidays = pd.to_datetime(['2024-01-26', '2024-08-15', '2024-10-02'])  # Replace with real holiday dates
festivals = pd.to_datetime(['2024-03-29', '2024-11-04', '2024-12-25'])  # Replace with real festival dates

# Function to aggregate data based on frequency
def aggregate_data(df, freq):
    return df.resample(freq).sum()

# ARIMA Forecast Function
def arima_forecast(data, periods):
    if data.empty:
        print("ARIMA Error: No data available for ARIMA model.")
        return None
    try:
        data_series = data.squeeze()  # Ensure data is a Series
        model = auto_arima(data_series, seasonal=False, stepwise=True)
        arima_model = ARIMA(data_series, order=model.order)
        arima_model_fit = arima_model.fit()
        forecast = arima_model_fit.forecast(steps=periods)
        forecast_df = pd.DataFrame({
            'timestamp': pd.date_range(start=data.index[-1] + pd.DateOffset(days=1), periods=periods, freq='D'),
            'ARIMA_Forecast': forecast
        })
        return forecast_df
    except Exception as e:
        print(f"ARIMA Error: {e}")
        return None
# Prophet Forecast Function
def prophet_forecast(data, periods):
    if data.empty or data.dropna().shape[0] < 2:
        print("Prophet Error: Not enough data available for Prophet model.")
        return None
    try:
        data_prophet = data.reset_index()
        data_prophet.columns = ['ds', 'y']
        model = Prophet()
        model.fit(data_prophet)
        future = model.make_future_dataframe(periods=periods)
        forecast = model.predict(future)
        forecast_df = forecast[['ds', 'yhat']].tail(periods)
        forecast_df.columns = ['timestamp', 'Prophet_Forecast']
        return forecast_df
    except Exception as e:
        print(f"Prophet Error: {e}")
        return None

# Helper function to combine ARIMA and Prophet forecasts
def forecast_combined(data, periods):
    if 'transaction_id' not in data.columns:
        print("Error: 'transaction_id' column is missing in the data.")
        return None
    
    arima_forecast_df = arima_forecast(data['transaction_id'], periods)
    prophet_forecast_df = prophet_forecast(data['transaction_id'], periods)

    if arima_forecast_df is not None and prophet_forecast_df is not None:
        combined_forecast_df = pd.merge(arima_forecast_df, prophet_forecast_df, on='timestamp', how='outer')
        return combined_forecast_df
    return None

# Forecast functions with date range filtering
def forecast_data(df, freq, start_date, end_date, periods):
    data = df.loc[start_date:end_date]
    if data.empty:
        print(f"No data available for the specified {freq} range.")
        return None
    data = aggregate_data(data, freq)
    return forecast_combined(data, periods)

def forecast_hourly_by_hour(df, periods):
    def process_hour(hour):
        hour_data = df[df.index.hour == hour]
        if hour_data.empty:
            print(f"No data available for the {hour+1}th hour.")
            return None
        hour_data = aggregate_data(hour_data, 'D')  # Aggregate by day
        forecast = forecast_combined(hour_data, periods)
        if forecast is not None:
            forecast['hour'] = hour + 1  # Add the hour identifier
            return forecast
        return None

    forecasts = Parallel(n_jobs=-1)(delayed(process_hour)(hour) for hour in range(24))
    forecast_df = pd.concat([f for f in forecasts if f is not None], ignore_index=True)
    return forecast_df

# Example usage for forecast with different types
forecast_type = 'hourly_by_hour'  # 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'festival', 'holiday', 'hourly_by_hour'
start_date = '2024-01-01'  # Example start date
end_date = '2024-12-31'    # Example end date
periods = 30  # Number of periods to forecast

forecast_functions = {
    'hourly': lambda: forecast_data(df, 'H', start_date, end_date, periods),
    'daily': lambda: forecast_data(df, 'D', start_date, end_date, periods),
    'weekly': lambda: forecast_data(df, 'W', start_date, end_date, periods),
    'monthly': lambda: forecast_data(df, 'M', start_date, end_date, periods),
    'yearly': lambda: forecast_data(df, 'Y', start_date, end_date, periods),
    'festival': lambda: forecast_combined(df[df.index.isin(festivals)], periods),
    'holiday': lambda: forecast_combined(df[df.index.isin(holidays)], periods),
    'hourly_by_hour': lambda: forecast_hourly_by_hour(df, periods)
}

if forecast_type in forecast_functions:
    forecast = forecast_functions[forecast_type]()
else:
    print(f"Unknown forecast type: {forecast_type}")
    forecast = None

# Save the forecasted data to CSV
output_dir = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f'{forecast_type}_forecast.csv')

if forecast is not None:
    forecast.to_csv(output_path, index=False)
    print(f"Forecast saved at: {output_path}")
else:
    print("Forecasting failed.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from prophet import Prophet
import os

# Load Dataset
df = pd.read_csv('transaction_updated.csv')

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', errors='coerce')
df = df.dropna(subset=['timestamp'])
df.set_index('timestamp', inplace=True)

# Filtering based on user input for stn no and EqN
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")

if stn_no.lower() != 'all':
    df = df[df['stn_no'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Hourly Level
hourly_footfall = df.resample('H').count().asfreq('H')

# Time-Series Decomposition
decomposition = seasonal_decompose(hourly_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
forecast_period = (end_date - start_date).days * 24  # Forecast period in hours

# File Path Base
base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'

def save_forecast_data(df, model_name, file_suffixes):
    for suffix, freq in file_suffixes:
        aggregated_df = df.resample(freq).sum()
        if freq == 'W':
            aggregated_df['week_of_year'] = aggregated_df.index.isocalendar().week
            aggregated_df['day_of_week'] = aggregated_df.index.day_name()
        elif freq == 'M':
            aggregated_df['month_name'] = aggregated_df.index.month_name()
        elif freq in ['D', 'H']:
            aggregated_df['day_of_week'] = aggregated_df.index.day_name()
        aggregated_df.to_csv(os.path.join(base_path, f'{freq}_forecast_{model_name}.csv'))

    # Aggregation by Hour of Day
    hourly_aggr = df.groupby(df.index.hour).sum().reset_index()
    hourly_aggr.to_csv(os.path.join(base_path, f'hourly_aggr_{model_name}.csv'), index=False)

# ARIMA Forecasting
try:
    # Fit ARIMA model and forecast
    auto_model = auto_arima(hourly_footfall['transaction_id'].dropna(), seasonal=False, stepwise=True)
    arima_model = ARIMA(hourly_footfall['transaction_id'], order=auto_model.order).fit()
    arima_forecast = arima_model.forecast(steps=forecast_period)
    arima_forecast_df = pd.DataFrame({'timestamp': pd.date_range(hourly_footfall.index[-1] + pd.Timedelta(hours=1), periods=len(arima_forecast), freq='H'), 'ARIMA_Forecast': arima_forecast})
    arima_forecast_df = arima_forecast_df[arima_forecast_df['timestamp'] >= start_date]
    arima_forecast_df['ARIMA_Forecast'] = arima_forecast_df['ARIMA_Forecast'].round()

    # Save ARIMA Forecast Data
    arima_forecast_df.to_csv(os.path.join(base_path, f'arima_forecast_hourly_{stn_no}_{eqn_no}.csv'), index=False)
    save_forecast_data(arima_forecast_df.set_index('timestamp'), 'arima', [('W', 'W'), ('M', 'M'), ('Y', 'Y'), ('D', 'D')])

    # Plot ARIMA Forecast
    plt.figure(figsize=(10, 6))
    plt.plot(hourly_footfall.index, hourly_footfall['transaction_id'], label='Observed', color='blue')
    plt.plot(arima_forecast_df['timestamp'], arima_forecast_df['ARIMA_Forecast'], label='ARIMA Forecast', color='orange')
    plt.title(f'ARIMA Model: Observed vs Forecasted Footfall ({start_date.date()} to {end_date.date()})')
    plt.xlabel('Date')
    plt.ylabel('Footfall')
    plt.legend()
    plt.show()

except Exception as e:
    print(f"An error occurred with ARIMA model: {e}")

# Prophet Model - Forecasting
hourly_footfall_prophet = hourly_footfall.reset_index()[['timestamp', 'transaction_id']]
hourly_footfall_prophet.columns = ['ds', 'y']
hourly_footfall_prophet['ds'] = pd.to_datetime(hourly_footfall_prophet['ds'])
hourly_footfall_prophet['y'] = pd.to_numeric(hourly_footfall_prophet['y'], errors='coerce')

prophet_model = Prophet()
prophet_model.fit(hourly_footfall_prophet)

future_prophet = pd.DataFrame({'ds': pd.date_range(start=start_date, end=end_date, freq='H')})
forecast_prophet = prophet_model.predict(future_prophet)
prophet_forecast_df = forecast_prophet[['ds', 'yhat']].rename(columns={'ds': 'timestamp', 'yhat': 'transaction_id'})
prophet_forecast_df['transaction_id'] = prophet_forecast_df['transaction_id'].round()

# Save Prophet Forecast Data
prophet_forecast_df.to_csv(os.path.join(base_path, f'hourly_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'), index=False)
save_forecast_data(prophet_forecast_df.set_index('timestamp'), 'prophet', [('W', 'W'), ('M', 'M'), ('Y', 'Y'), ('D', 'D')])

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())
print("Files saved to their respective destinations")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from prophet import Prophet
import os

# Load Dataset
df = pd.read_csv('transaction_updated.csv')  # Replace with your dataset path

# Data Preprocessing
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', errors='coerce')

# Check for any rows where parsing failed
invalid_dates = df[df['timestamp'].isnull()]
if not invalid_dates.empty:
    print("Some dates could not be parsed:")
    print(invalid_dates)

# Drop rows with invalid dates if needed
df = df.dropna(subset=['timestamp'])

# Continue with setting the index
df.set_index('timestamp', inplace=True)

# Filtering based on user input for stn no and EqN
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")

# Apply filters
if stn_no.lower() != 'all':
    df = df[df['stn_no'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Hourly Level
hourly_footfall = df.resample('H').count()  # Hourly footfall

# Set frequency for hourly_footfall
hourly_footfall = hourly_footfall.asfreq('H')

# Time-Series Decomposition
decomposition = seasonal_decompose(hourly_footfall['transaction_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
forecast_period = (end_date - start_date).days * 24  # Forecast period in hours

# File Path Base
base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'

# Prophet Model - Alternative Time-Series Forecasting
hourly_footfall_prophet = hourly_footfall.reset_index()

# Ensure the DataFrame only has 'timestamp' and 'transaction_id' after resetting the index
hourly_footfall_prophet = hourly_footfall_prophet[['timestamp', 'transaction_id']]
hourly_footfall_prophet.columns = ['ds', 'y']  # Rename columns for Prophet

# Convert 'ds' to datetime and 'y' to numeric
hourly_footfall_prophet['ds'] = pd.to_datetime(hourly_footfall_prophet['ds'])
hourly_footfall_prophet['y'] = pd.to_numeric(hourly_footfall_prophet['y'], errors='coerce')

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(hourly_footfall_prophet)

# Create future dataframe starting from the user-specified start_date
future_df = pd.date_range(start=start_date, end=end_date, freq='H')
future_prophet = pd.DataFrame({'ds': future_df})
forecast_prophet = prophet_model.predict(future_prophet)

# Creating a DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
prophet_forecast_df.columns = ['timestamp', 'transaction_id']

# Round off the Prophet forecasted data
prophet_forecast_df['transaction_id'] = prophet_forecast_df['transaction_id'].round()

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(os.path.join(base_path, f'hourly_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'), index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('timestamp', inplace=True)

# Aggregation: Daily, Weekly, Monthly, Yearly
daily_forecast_prophet_resampled = daily_forecast_prophet.resample('D').sum()
weekly_forecast_prophet_resampled = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet_resampled = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet_resampled = daily_forecast_prophet.resample('Y').sum()

# Weekend and Weekday Aggregation
weekend_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year and day of week to weekly forecast
weekly_forecast_prophet_resampled['week_of_year'] = weekly_forecast_prophet_resampled.index.isocalendar().week
weekly_forecast_prophet_resampled['day_of_week'] = weekly_forecast_prophet_resampled.index.day_name()

# Add month name to monthly forecast
monthly_forecast_prophet_resampled['month_name'] = monthly_forecast_prophet_resampled.index.month_name()

# Add day of week to weekend and weekday forecasts
weekend_forecast_prophet_resampled['day_of_week'] = weekend_forecast_prophet_resampled.index.day_name()
weekday_forecast_prophet_resampled['day_of_week'] = weekday_forecast_prophet_resampled.index.day_name()

# Save Aggregated Forecast Data to CSV
daily_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'daily_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
monthly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'monthly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
yearly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'yearly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekend_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekend_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekday_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekday_forecast_prophet_{stn_no}_{eqn_no}.csv'))

# Aggregation of Prophet Forecast Data by Hour of Day
prophet_forecast_df['hour'] = prophet_forecast_df['timestamp'].dt.hour
hourly_aggr_prophet = prophet_forecast_df.groupby('hour')['transaction_id'].sum().reset_index()

# Save Aggregated Hourly Prophet Forecast Data to CSV
hourly_aggr_prophet.to_csv(os.path.join(base_path, f'hourly_aggr_prophet_forecast_{stn_no}_{eqn_no}.csv'), index=False)

# Display the Prophet Forecast DataFrame
print(prophet_forecast_df.head())

print("Files saved to their respective destinations")


In [None]:
.index, hourly_df['transaction_id'], label='Hourly Footfall Forecast', color='orange')
        plt.title('Hourly Aggregated Forecast Footfall')
        plt.xlabel('Hour')
        plt.ylabel('Transaction ID')
        plt.legend()
        plt.grid(True)
        plt.show()
    
    else:
        # Load Dataset
        file_path = file_paths.get(data_type)
        if file_path:
            df = pd.read_csv(file_path)
            
            # Data Preprocessing
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df.set_index('timestamp', inplace=True)
            
            # Print the raw data to check its structure
            print(f"Raw data for {data_type}:")
            print(df.head())

            # Plotting the raw data
            plt.figure(figsize=(12, 6))
            plt.plot(df.index, df['transaction_id'], label=f'Raw {data_type.capitalize()} Footfall', color='blue')
            plt.title(f'Raw {data_type.capitalize()} Footfall Over Time')
            plt.xlabel('Timestamp')
            plt.ylabel('Transaction ID')
            plt.legend()
            plt.grid(True)
            plt.show()
        else:
            print(f"File path for '{data_type}' data type is not provided.")

def main():
    # Define the base path for output files
    base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'
    
    # Get station number and equation number from the user
    stn_no = input("Enter the station number (stn_no): ").strip()
    eqn_no = input("Enter the equation number (eqn_no): ").strip()
    
    # Define file paths for different forecast types
    file_paths = {
        'daily': os.path.join(base_path, 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from prophet import Prophet
import os
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from prophet.diagnostics import cross_validation, performance_metrics

# Load Dataset
try:
    df = pd.read_csv('transaction.csv')  # Replace with your dataset path
except FileNotFoundError:
    print("Error: The file was not found.")
    exit()

# Data Preprocessing
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
invalid_dates = df[df['Dt'].isnull()]
if not invalid_dates.empty:
    print("Some dates could not be parsed:")
    print(invalid_dates)
df = df.dropna(subset=['Dt'])
df.set_index('Dt', inplace=True)

# Filtering based on user input
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")
if stn_no.lower() != 'all':
    df = df[df['Sta'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Hourly Level
hourly_footfall = df.resample('H').count()
hourly_footfall = hourly_footfall.asfreq('H')

# Time-Series Decomposition for visual inspection
decomposition = seasonal_decompose(hourly_footfall['_id'], model='additive')
fig = decomposition.plot()
fig.suptitle('Time-Series Decomposition: Trend, Seasonal, and Residuals', y=1.02)
plt.show()

# Specify Forecast Period
while True:
    try:
        start_date = pd.to_datetime(input("Enter forecast start date (YYYY-MM-DD): "))
        end_date = pd.to_datetime(input("Enter forecast end date (YYYY-MM-DD): "))
        if start_date >= end_date:
            raise ValueError("Start date must be before end date.")
        break
    except ValueError as e:
        print(f"Error: {e}. Please enter valid dates.")

forecast_period = (end_date - start_date).days * 24

# File Path Base
base_path = r'C:\Users\admin\Desktop\airline\sensor-file-ridership\output of sensors'

# Convert holidays to DataFrame
holiday_df = pd.DataFrame({
    'holiday': 'Indian Holiday',
    'ds': list(pd.to_datetime(list(indian_holidays.keys()))),
    'lower_window': 0,
    'upper_window': 1,
})

# Prophet Model - Time-Series Forecasting with Seasonalities
hourly_footfall_prophet = hourly_footfall.reset_index()
hourly_footfall_prophet = hourly_footfall_prophet[['Dt', '_id']]
hourly_footfall_prophet.columns = ['ds', 'y']
hourly_footfall_prophet['ds'] = hourly_footfall_prophet['ds'].dt.tz_localize(None)
hourly_footfall_prophet['y'] = pd.to_numeric(hourly_footfall_prophet['y'], errors='coerce')

prophet_model = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=True,
    seasonality_prior_scale=20,  # Increased flexibility
    holidays_prior_scale=20,    # Increased impact of holidays
    changepoint_prior_scale=0.2  # Increased flexibility in trend changes
)

prophet_model.add_country_holidays(country_name='US')  # Example of adding country holidays

prophet_model.fit(hourly_footfall_prophet)

# Create future dataframe
future_df = pd.date_range(start=start_date, end=end_date, freq='H')
future_prophet = pd.DataFrame({'ds': future_df})

# Forecast with Prophet model
forecast_prophet = prophet_model.predict(future_prophet)

# Creating DataFrame for Prophet Forecast
prophet_forecast_df = forecast_prophet[['ds', 'yhat']]
prophet_forecast_df.columns = ['Dt', '_id']
prophet_forecast_df['_id'] = prophet_forecast_df['_id'].round()

# Save Prophet Forecast to CSV
prophet_forecast_df.to_csv(os.path.join(base_path, f'hourly_footfall_forecast_prophet_{stn_no}_{eqn_no}.csv'), index=False)

# Aggregation of Prophet Forecast
daily_forecast_prophet = prophet_forecast_df.copy()
daily_forecast_prophet.set_index('Dt', inplace=True)

# Aggregation: Daily, Weekly, Monthly, Yearly
daily_forecast_prophet_resampled = daily_forecast_prophet.resample('D').sum()
weekly_forecast_prophet_resampled = daily_forecast_prophet.resample('W').sum()
monthly_forecast_prophet_resampled = daily_forecast_prophet.resample('M').sum()
yearly_forecast_prophet_resampled = daily_forecast_prophet.resample('Y').sum()

# Weekend and Weekday Aggregation
weekend_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek >= 5].resample('D').sum()
weekday_forecast_prophet_resampled = daily_forecast_prophet[daily_forecast_prophet.index.dayofweek < 5].resample('D').sum()

# Add week of year and day of week to weekly forecast
weekly_forecast_prophet_resampled['week_of_year'] = weekly_forecast_prophet_resampled.index.isocalendar().week
weekly_forecast_prophet_resampled['day_of_week'] = weekly_forecast_prophet_resampled.index.day_name()

# Add month name to monthly forecast
monthly_forecast_prophet_resampled['month_name'] = monthly_forecast_prophet_resampled.index.month_name()

# Add day of week to weekend and weekday forecasts
weekend_forecast_prophet_resampled['day_of_week'] = weekend_forecast_prophet_resampled.index.day_name()
weekday_forecast_prophet_resampled['day_of_week'] = weekday_forecast_prophet_resampled.index.day_name()

# Save Aggregated Forecast Data to CSV
daily_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'daily_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
monthly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'monthly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
yearly_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'yearly_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekend_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekend_forecast_prophet_{stn_no}_{eqn_no}.csv'))
weekday_forecast_prophet_resampled.to_csv(os.path.join(base_path, f'weekday_forecast_prophet_{stn_no}_{eqn_no}.csv'))

# Aggregation of Prophet Forecast Data by Hour of Day
prophet_forecast_df['hour'] = prophet_forecast_df['Dt'].dt.hour
hourly_aggr_prophet = prophet_forecast_df.groupby('hour')['_id'].sum().reset_index()
hourly_aggr_prophet.to_csv(os.path.join(base_path, f'hourly_aggr_prophet_forecast_{stn_no}_{eqn_no}.csv'), index=False)

# Validate Performance for Hourly Forecast
# Ensure date formats and time zones are aligned
hourly_footfall_prophet['ds'] = pd.to_datetime(hourly_footfall_prophet['ds']).dt.tz_localize(None)
prophet_forecast_df['Dt'] = pd.to_datetime(prophet_forecast_df['Dt']).dt.tz_localize(None)

# Reindex and merge data
hourly_actual_prophet = hourly_footfall_prophet.set_index('ds').reindex(prophet_forecast_df['Dt']).reset_index()
hourly_actual_prophet.columns = ['Dt', 'Actual']

# Print date ranges and samples for debugging
print("Hourly Footfall Date Range:", hourly_footfall_prophet['ds'].min(), "to", hourly_footfall_prophet['ds'].max())
print("Forecast Date Range:", prophet_forecast_df['Dt'].min(), "to", prophet_forecast_df['Dt'].max())
print("Sample dates from hourly_footfall_prophet:")
print(hourly_footfall_prophet.head())
print("Sample dates from prophet_forecast_df:")
print(prophet_forecast_df.head())

# Check if the hourly_actual_prophet DataFrame has actual data
if hourly_actual_prophet['Actual'].isnull().all():
    print("Warning: No actual data available for the forecast period.")
else:
    print("Actual data available for some or all forecast periods.")

# Merge forecast and actual data
merged_hourly_df = pd.merge(prophet_forecast_df, hourly_actual_prophet, on='Dt', how='inner')

# Plotting Forecast vs Actual
plt.figure(figsize=(12, 6))
plt.plot(merged_hourly_df['Dt'], merged_hourly_df['_id'], label='Forecast', color='blue')
plt.plot(merged_hourly_df['Dt'], merged_hourly_df['Actual'], label='Actual', color='orange')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.title('Hourly Forecast vs Actual')
plt.legend()
plt.show()

prophet_model.plot_components(forecast_prophet)
plt.show()

# Error Metrics
mae = mean_absolute_error(merged_hourly_df['Actual'], merged_hourly_df['_id'])
mse = mean_squared_error(merged_hourly_df['Actual'], merged_hourly_df['_id'])
rmse = np.sqrt(mse)
print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}")

# Cross-validation for Prophet Model
cv_results = cross_validation(prophet_model, initial='365 days', period='180 days', horizon='30 days', parallel="processes")
cv_metrics = performance_metrics(cv_results)
print(cv_metrics)

# Residual Analysis
merged_hourly_df['Residual'] = merged_hourly_df['Actual'] - merged_hourly_df['_id']
plt.figure(figsize=(12, 6))
plt.plot(merged_hourly_df['Dt'], merged_hourly_df['Residual'], label='Residuals')
plt.axhline(y=0, color='r', linestyle='--', label='Zero Line')
plt.xlabel('Date')
plt.ylabel('Residual')
plt.title('Residuals Analysis')
plt.legend()
plt.show()

# Create DataFrame for holidays
holiday_df_actual = hourly_footfall_prophet[hourly_footfall_prophet['ds'].isin(holiday_df['ds'])]

# Aggregate total footfall on holidays
holiday_agg = holiday_df_actual.groupby('ds').agg({'y': 'sum'}).reset_index()

# Plot holiday data
plt.figure(figsize=(12, 6))
plt.bar(holiday_agg['ds'], holiday_agg['y'], color='green')
plt.xlabel('Holiday Date')
plt.ylabel('Total Footfall')
plt.title('Footfall on Holidays')
plt.xticks(rotation=45)
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import NBEATSModel
from darts.metrics import mae, rmse

# Load and preprocess your data
df = pd.read_csv('transaction.csv')  # Replace with your dataset path
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
df = df.dropna(subset=['Dt'])
df.set_index('Dt', inplace=True)

# Filtering based on user input
stn_no = input("Enter station number (or 'all' for all stations): ")
eqn_no = input("Enter equipment number (or 'all' for all equipment): ")
if stn_no.lower() != 'all':
    df = df[df['Sta'] == int(stn_no)]
if eqn_no.lower() != 'all':
    df = df[df['EqN'] == int(eqn_no)]

# Aggregating the Footfall at Hourly Level
hourly_footfall = df.resample('H').count()
hourly_footfall = hourly_footfall.asfreq('H')

# Convert to TimeSeries
series = TimeSeries.from_dataframe(hourly_footfall, value_cols='_id')

# Split data into training and test sets
train, test = series[:-48], series[-48:]

# Initialize and train N-BEATS model
model = NBEATSModel(input_chunk_length=24, output_chunk_length=24, n_epochs=100)
model.fit(train, verbose=True)

# Forecast
forecast = model.predict(len(test))

# Evaluate the model
error_mae = mae(test, forecast)
error_rmse = rmse(test, forecast)

print(f'MAE: {error_mae}')
print(f'RMSE: {error_rmse}')

# Plot results
plt.figure(figsize=(12, 6))
series.plot(label='Actual')
forecast.plot(label='Forecast', color='orange')
plt.xlabel('Date')
plt.ylabel('Footfall')
plt.title('Hourly Forecast vs Actual')
plt.legend()
plt.show()


Enter station number (or 'all' for all stations):  all
Enter equipment number (or 'all' for all equipment):  all


  hourly_footfall = df.resample('H').count()
  hourly_footfall = hourly_footfall.asfreq('H')
The provided DatetimeIndex was associated with a timezone, which is currently not supported by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling `ts.time_index.tz_localize(UTC)` when exporting the results.To plot the series with the right time steps, consider setting the matplotlib.pyplot `rcParams['timezone']` parameter to automatically convert the time axis back to the original timezone.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | 

Training: |                                                                                      | 0/? [00:00<…