In [5]:
import os
import pandas as pd
import datetime

# Set the file path (adjust if needed)
file_path = r'C:\Users\antho\OneDrive\Bureau\BA Capstone\Lab Time series\Dataset_Time Series I_HW_Number of Products Sold.csv'
if not os.path.exists(file_path):
    raise FileNotFoundError(f"File not found: {file_path}")

# Load the dataset
df = pd.read_csv(file_path)

# Inspect the first few rows and columns
print("Dataset head:")
print(df.head())
print("\nColumns in dataset:")
print(df.columns)

# Assuming the date column is named "Date" and the sales column is "Number of Products Sold"
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')
df = df.sort_index()

# Create a new column with the date as an ordinal number (this converts the date into a numeric value)
df['DateOrdinal'] = df.index.map(datetime.datetime.toordinal)

# Inspect the DataFrame after adding the new feature
print("\nData after processing:")
print(df.head())


Dataset head:
       Date  store  product  number_sold
0  1/1/2010      0        0          801
1  1/2/2010      0        0          810
2  1/3/2010      0        0          818
3  1/4/2010      0        0          796
4  1/5/2010      0        0          808

Columns in dataset:
Index(['Date', 'store', 'product', 'number_sold'], dtype='object')

Data after processing:
            store  product  number_sold  DateOrdinal
Date                                                
2010-01-01      0        0          801       733773
2010-01-01      6        1          957       733773
2010-01-01      4        9          947       733773
2010-01-01      3        6          378       733773
2010-01-01      1        1          745       733773


In [9]:
from sklearn.linear_model import LinearRegression

# Define features (X) and target (y)
# Ensure that the sales column name matches your dataset exactly
sales_column = "number_sold"  # Update if necessary
X = df[['DateOrdinal']]
y = df[sales_column]

# Fit the Linear Regression model
model = LinearRegression()
model.fit(X, y)

print("Linear Regression model fitted successfully!")
print("Coefficient:", model.coef_)
print("Intercept:", model.intercept_)


Linear Regression model fitted successfully!
Coefficient: [0.00207946]
Intercept: -748.3398870912031


In [11]:
import numpy as np

# Get the last date from the dataset
last_date = df.index[-1]

# Create a date range for the next 30 days
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)

# Convert these future dates to ordinal values and reshape for prediction
future_ordinals = np.array([d.toordinal() for d in future_dates]).reshape(-1, 1)

# Predict sales for these future dates
predictions = model.predict(future_ordinals)

# Create a DataFrame for the forecast
forecast_df = pd.DataFrame(predictions, index=future_dates, columns=["Predicted Sales"])

print("Forecast for the next 30 days:")
print(forecast_df)


Forecast for the next 30 days:
            Predicted Sales
2019-01-01       784.344735
2019-01-02       784.346814
2019-01-03       784.348893
2019-01-04       784.350973
2019-01-05       784.353052
2019-01-06       784.355132
2019-01-07       784.357211
2019-01-08       784.359291
2019-01-09       784.361370
2019-01-10       784.363450
2019-01-11       784.365529
2019-01-12       784.367609
2019-01-13       784.369688
2019-01-14       784.371768
2019-01-15       784.373847
2019-01-16       784.375926
2019-01-17       784.378006
2019-01-18       784.380085
2019-01-19       784.382165
2019-01-20       784.384244
2019-01-21       784.386324
2019-01-22       784.388403
2019-01-23       784.390483
2019-01-24       784.392562
2019-01-25       784.394642
2019-01-26       784.396721
2019-01-27       784.398800
2019-01-28       784.400880
2019-01-29       784.402959
2019-01-30       784.405039




In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))

# Plot observed data as a solid blue line with markers
plt.plot(df.index, y, label='Observed', marker='o', linestyle='-', color='blue')

# Plot forecast data as a dashed red line with markers
plt.plot(forecast_df.index, forecast_df['Predicted Sales'], label='Forecast', marker='o', linestyle='--', color='red')

plt.xlabel('Date')
plt.ylabel('Number of Products Sold')
plt.title('30-Day Forecast using Linear Regression')
plt.legend()
plt.grid(True)
plt.show()


