In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load data
data = pd.read_csv('sales_data.csv')

# Data cleaning and preparation
data.dropna(inplace=True) # Drop rows with missing data
data['date'] = pd.to_datetime(data['date']) # Convert date column to datetime format
data.set_index('date', inplace=True) # Set date column as index
data_monthly = data.resample('M').sum() # Resample data to monthly intervals
data_monthly = data_monthly[['sales']] # Select only sales column

# Data visualization and exploration
plt.figure(figsize=(10,5))
sns.lineplot(x=data_monthly.index, y='sales', data=data_monthly)
plt.title('Monthly Sales Data')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.show()

# Feature engineering
data_monthly['lag_1'] = data_monthly['sales'].shift(1) # Create lag feature
data_monthly['trend'] = np.arange(len(data_monthly)) # Create trend feature

# Model training and evaluation
X = data_monthly[['lag_1', 'trend']].iloc[1:] # Select lag and trend features, excluding first row with missing data
y = data_monthly['sales'].iloc[1:] # Select sales column, excluding first row with missing data
model = LinearRegression() # Instantiate linear regression model
model.fit(X, y) # Fit model to data
y_pred = model.predict(X) # Make predictions using trained model
mse = mean_squared_error(y, y_pred) # Calculate mean squared error
rmse = np.sqrt(mse) # Calculate root mean squared error
print('Root Mean Squared Error:', rmse)

# Forecasting
last_date = data_monthly.index[-1] # Get last date in data
next_date = last_date + pd.DateOffset(months=1) # Get next date for forecasting
next_trend = len(data_monthly) # Calculate trend feature for next date
last_sales = data_monthly['sales'][-1] # Get last sales value in data
next_sales = model.predict([[last_sales, next_trend]]) # Make prediction for next date
forecast = pd.DataFrame({'date': [next_date], 'sales': next_sales}) # Create forecast DataFrame
forecast.set_index('date', inplace=True) # Set date column as index

# Data visualization
plt.figure(figsize=(10,5))
sns.lineplot(x=data_monthly.index, y='sales', data=data_monthly)
sns.lineplot(x=forecast.index, y='sales', data=forecast)
plt.title('Monthly Sales Data with Forecast')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend(labels=['Actual', 'Forecast'])
plt.show()
