In [None]:
# !pip install tensorflow
# !pip install pandas
# !pip install numpy
# !pip install scikit-learn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from keras.models import load_model 

In [None]:
# Load your data (replace 'data.csv' with your dataset)
data = pd.read_csv('data_daily.csv')
data['# Date'] = pd.to_datetime(data['# Date'])
data.set_index('# Date', inplace=True)
data = data.resample('D').sum()  # Ensure daily data

In [None]:
data.columns

In [None]:
data.head()

In [None]:
# Normalize the data
scaler = MinMaxScaler()
data['Receipt_Count'] = scaler.fit_transform(data['Receipt_Count'].values.reshape(-1, 1))

In [None]:
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

sequence_length = 12  # You can adjust this to control the sequence length (e.g., 12 months)
X, y = create_sequences(data['Receipt_Count'], sequence_length)

In [None]:
train_size = int(0.8 * len(data))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
model = Sequential()
model.add(LSTM(50, input_shape=(sequence_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.fit(X_train, y_train, epochs=50, batch_size=32)

In [None]:
# Generate the initial sequence for prediction
last_sequence = data['Receipt_Count'][-sequence_length:].values
predictions = []

for _ in range(sequence_length):
    input_data = last_sequence.reshape(1, sequence_length, 1)
    prediction = model.predict(input_data)
    predictions.append(prediction[0, 0])
    last_sequence = np.append(last_sequence[1:], prediction[0, 0])

# Inverse transform the predictions to get the original scale
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
predictions.shape

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Generate a sequence of dates for 2022
start_date = pd.to_datetime('2022-01-01')
end_date = pd.to_datetime('2022-12-31')
forecast_dates = pd.date_range(start_date, end_date, freq='M')

# Create the figure and plot the data
plt.figure(figsize=(12, 8))
plt.plot(data.index, data['Receipt_Count'], label='Historical Data')
plt.plot(forecast_dates, predictions, label='Predictions', color='red')
plt.title('Scanned Receipts Forecast for 2022')
plt.xlabel('Date')
plt.ylabel('Number of Receipts')
plt.legend(loc='best')
plt.show()

In [None]:
model.save("final1.h5") 

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model = load_model('final.h5')
model.summary()

In [None]:
last_sequence = data['Receipt_Count'][-sequence_length:].values
predictions = []

for month in range(12):
    a = last_sequence.reshape(1, sequence_length, 1)
    prediction = model.predict(a)
    print(prediction.shape)
    predictions.append(prediction[0, 0])
    last_sequence = np.append(last_sequence[1:], prediction[0, 0])

# Inverse transform the predictions to get the original scale
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
predictions.shape

In [None]:
last_date = data['# Date'].iloc[-1]
last_date
#date_range = [last_date + timedelta(days=i) for i in range(1, sequence_length + 13)]

In [None]:
data.columns

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Step 1: Data Preparation
# Load or create a DataFrame named 'data' with 'date' and 'receipts' columns for 2021.
# Ensure that 'date' is in datetime format.
data = pd.read_csv('data_daily.csv')
data['# Date'] = pd.to_datetime(data['# Date'])
data.set_index('# Date', inplace=True)

# Step 2: Data Preprocessing
# data['# Date'] = pd.to_datetime(data['# Date'])
# data.set_index('# Date', inplace=True)
data = data.resample('D').sum()  # Ensure daily data

# Normalize the data
scaler = MinMaxScaler()
data['Receipt_Count'] = scaler.fit_transform(data[['Receipt_Count']])

# Create input sequences and target values
look_back = 30  # Number of days to look back for prediction
X, y = [], []
for i in range(len(data) - look_back):
    X.append(data.iloc[i:i + look_back, 0].values)
    y.append(data.iloc[i + look_back, 0])
X, y = np.array(X), np.array(y)

# Split data into training and testing
train_size = int(len(X) * 0.8)
trainX, testX = X[:train_size], X[train_size:]
trainY, testY = y[:train_size], y[train_size:]

# Step 3: Create and Train the LSTM Model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=5, batch_size=1, verbose=2)

# Step 4: Model Evaluation
testPredict = model.predict(testX)
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform(testY.reshape(-1, 1))
print(testY.shape)

In [None]:
# Step 5: Monthly Receipt Prediction for 2022
# To predict monthly values for 2022, forecast each day and aggregate the results.
# Use the trained model to predict one day ahead, and repeat for each day in 2022.
# Then aggregate the results for each month.

# Initialize an array to store monthly predictions for 2022
monthly_predictions_2022 = []

# Generate initial input data for the first day of 2022
input_data = testX[-look_back:].reshape(1, look_back, 1)
print(input_data.shape)


# Define the number of days in each month in 2022
days_in_months_2022 = {
    1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
    7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31
}

# Iterate through each month in 2022
for month in range(1, 13):
    monthly_receipt_predictions = []

    # Predict each day of the current month
    for day in range(days_in_months_2022[month]):
        # Predict the next day's receipts
        predicted_receipts = model.predict(input_data)
        monthly_receipt_predictions.append(predicted_receipts[0, 0])

        # Update the input data for the next day's prediction
        input_data = np.append(input_data[:, 1:, :], predicted_receipts.reshape(1, 1, 1), axis=1)

    # Aggregate daily predictions for the current month
    monthly_predictions_2022.append(sum(monthly_receipt_predictions))

# Display the monthly predictions for 2022
for month, prediction in enumerate(monthly_predictions_2022, start=1):
    print(f"Month {month}: {prediction:.2f} scanned receipts")


In [None]:
data.columns

In [None]:
##
import pandas as pd

# Load historical sales data
data = pd.read_csv('data_daily.csv')  # Replace with your data source
data['# Date'] = pd.to_datetime(data['# Date'])
data.set_index('# Date', inplace=True)
data = data.resample('D').sum()  # Ensure daily data

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Normalize the data
scaler = MinMaxScaler()
data['Receipt_Count'] = scaler.fit_transform(data[['Receipt_Count']])

# Create input sequences and target values
look_back = 12  # Number of days to look back for prediction
X, y = [], []
for i in range(len(data) - look_back):
    X.append(data.iloc[i:i + look_back, 0].values)
    y.append(data.iloc[i + look_back, 0])
X, y = np.array(X), np.array(y)


In [None]:
# Split data into training and testing sets
train_size = int(len(X) * 0.8)
trainX, testX = X[:train_size], X[train_size:]
trainY, testY = y[:train_size], y[train_size:]


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


In [None]:
model.fit(trainX, trainY, epochs=50, batch_size=32, verbose=2)

In [None]:
testPredict = model.predict(testX)
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform(testY.reshape(-1, 1))

# Evaluate the model (you can use various metrics)
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(testY, testPredict)
print(f"Mean Squared Error: {mse}")

In [None]:
# Generate the initial sequence for prediction
last_sequence = data['Receipt_Count'][-sequence_length:].values
predictions = []

for _ in range(sequence_length):
    input_data = last_sequence.reshape(1, sequence_length, 1)
    prediction = model.predict(input_data)
    predictions.append(prediction[0, 0])
    last_sequence = np.append(last_sequence[1:], prediction[0, 0])

# Inverse transform the predictions to get the original scale
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
predictions.shape

In [None]:
# Initialize an array to store monthly predictions for 2023
monthly_predictions_2023 = []

# Generate initial input data for the first day of 2023
a = data['Receipt_Count'][-look_back:].values 
input_data = a.reshape(1, look_back, 1)
print(a)
# Define the number of days in each month in 2023
days_in_months_2023 = {
    1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
    7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31
}

# Iterate through each month in 2023
for month in range(1, 13):
    monthly_sales_predictions = []

    # Predict each day of the current month
    for day in range(days_in_months_2023[month]):
        # Predict the next day's sales
        predicted_sales = model.predict(input_data)
        monthly_sales_predictions.append(predicted_sales[0, 0])

        # Update the input data for the next day's prediction
        input_data = np.append(input_data[:, 1:, :], predicted_sales.reshape(1, 1, 1), axis=1)

    # Aggregate daily predictions for the current month
    monthly_predictions_2023.append(sum(monthly_sales_predictions))


In [None]:
data

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow import keras

In [None]:
# Load your daily data for 2021 (replace 'your_data.csv' with your data source)
data = pd.read_csv('data_daily.csv')

# Create a feature for the month from the date
data['# Date'] = pd.to_datetime(data['# Date'])
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month

# Aggregate data by month
monthly_data = data.groupby('Month')['Receipt_Count'].sum().reset_index()
print(monthly_data)

# Extract features and target
X = monthly_data[['Year','Month']].values.reshape(-1, 1)
y = monthly_data['Receipt_Count'].values.reshape(-1, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler_X = MinMaxScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Scale the target
scaler_y = MinMaxScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_test = scaler_y.transform(y_test.reshape(-1, 1))

model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(1,)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)  # Output layer
])

model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

In [None]:
# Replace '12' with the month number you want to predict
future_month = np.array([[3]])

# Scale the future month using the same scaler used for X
scaled_future_month = scaler_X.transform(future_month)

# Make the prediction
scaled_prediction = model.predict(scaled_future_month)

# Inverse transform the prediction to get the actual number of receipts
predicted_receipts = scaler_y.inverse_transform(scaled_prediction)

print(f'Predicted number of receipts for December 2022: {predicted_receipts[0, 0]}')


In [None]:
future_months = np.arange(1, 13).reshape(-1, 1)  # Predict for each month
scaled_future_months = scaler_X.transform(future_months)
scaled_predictions = model.predict(scaled_future_months)
predicted_receipts = scaler_y.inverse_transform(scaled_predictions).flatten()

# Create a graph combining historical and predicted data
plt.figure(figsize=(10, 6))

# Plot historical data
plt.plot(monthly_data['Month'], monthly_data['Receipt_Count'], marker='o', linestyle='-', color='b', label='Historical Data')

# Plot predicted data for 2022
plt.plot(np.arange(1, 13), predicted_receipts, marker='o', linestyle='--', color='r', label='Predicted Data (2022)')

plt.xlabel('Month')
plt.ylabel('Receipts')
plt.title('Historical and Predicted Receipts for 2021 and 2022')
plt.xticks(np.arange(1, 13))
plt.legend()
plt.grid(True)

plt.show()


In [None]:
###
data

In [None]:
data.columns

In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess your data
data = pd.read_csv('data_daily.csv')  # Replace 'receipts_data.csv' with your data file.
data['# Date'] = pd.to_datetime(data['# Date'])
data.set_index('# Date', inplace=True)

# Resample the data on a monthly basis
monthly_data = data['Receipt_Count'].resample('D').sum()

# Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(monthly_data.values.reshape(-1, 1))

In [8]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 12  # You can adjust this based on your data and requirements.
X_train, y_train = create_sequences(normalized_data, seq_length)

In [3]:
# train_size = int(0.8 * len(data))
# X_train, X_test = X[:train_size], X[train_size:]
# y_train, y_test = y[:train_size], y[train_size:]

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50)                10400     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 10451 (40.82 KB)
Trainable params: 10451 (40.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [6]:
model.fit(X_train, y_train, epochs=50, batch_size=32)

ValueError: Expected input data to be non-empty.

In [None]:
# Initialize a list to store the simulated 2022 data
simulated_data_2022 = list(normalized_data[-seq_length:])

# Create a function to generate monthly predictions
def generate_monthly_predictions(model, current_seq):
    predicted_value = model.predict(current_seq)
    return predicted_value[0][0]

# Loop over each month in 2022
for _ in range(12):
    # Create the current sequence for prediction
    current_seq = np.array([simulated_data_2022[-seq_length:]]).reshape(1, seq_length, 1)
    
    # Generate the prediction for the month
    predicted_value = generate_monthly_predictions(model, current_seq)
    
    # Append the predicted value to the simulated data
    simulated_data_2022.append(predicted_value)

# Inverse transform the simulated data to the original scale
simulated_data_2022 = scaler.inverse_transform(np.array(simulated_data_2022).reshape(-1, 1))

In [None]:
simulated_data_2022 = list(normalized_data[-seq_length:])
simulated_data_2022