In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
# Load the data from the CSV file
df = pd.read_csv('../Queries/product_revenue.csv')

In [3]:
# Filter the data for the year 2021
df = df[df['order_year'] == 2021]

In [4]:
# Train a linear regression model to predict total revenue
X = df[['order_month']]
y = df['total_revenue']
X = sm.add_constant(X)  # Add a constant term to the input features
model = sm.OLS(y, X)
results = model.fit()

In [6]:
# Print the regression statistics
print('Linear Regression Statistics:')
print('----------------------------')
print('Coefficient: {:.2f}'.format(results.params['order_month']))
print('P-value: {:.4f}'.format(results.pvalues['order_month']))
print('R-squared: {:.4f}'.format(results.rsquared))
print('Adjusted R-squared: {:.4f}'.format(results.rsquared_adj))
print('Standard error of the estimate: {:.2f}'.format(np.sqrt(results.mse_resid)))
print('F-statistic: {:.2f}'.format(results.fvalue))
print('Degrees of freedom (DF): {}, {}'.format(results.df_model, results.df_resid))

Linear Regression Statistics:
----------------------------
Coefficient: -743.36
P-value: 0.0260
R-squared: 0.1649
Adjusted R-squared: 0.1351
Standard error of the estimate: 4973.27
F-statistic: 5.53
Degrees of freedom (DF): 1.0, 28.0


In [7]:
# Predict the total revenue for the next quarter (October to December)
X_pred = sm.add_constant([[10], [11], [12]])  # Input features for the next quarter
y_pred = results.predict(X_pred)  # Predicted revenue for the next quarter

# Print the predicted revenue for the next quarter
print('Predicted revenue for Q4 2021 (in dollars):')
print('-------------------------------------------')
print('October: {:.2f}'.format(y_pred[0]))
print('November: {:.2f}'.format(y_pred[1]))
print('December: {:.2f}'.format(y_pred[2]))
print('Total: {:.2f}'.format(y_pred.sum()))


Predicted revenue for Q4 2021 (in dollars):
-------------------------------------------
October: 31048.22
November: 30304.87
December: 29561.51
Total: 90914.60


In [8]:
df.head()

Unnamed: 0,product_type,order_year,order_month,total_revenue
0,Trousers,2021,1,40317
1,Trousers,2021,2,33662
2,Shirt,2021,9,32285
3,Jacket,2021,4,33790
4,Jacket,2021,8,38281


In [9]:
df.describe()

Unnamed: 0,order_year,order_month,total_revenue
count,30.0,30.0,30.0
mean,2021.0,5.5,34393.333333
std,0.0,2.921384,5347.569685
min,2021.0,1.0,24040.0
25%,2021.0,3.0,30763.75
50%,2021.0,5.5,33691.0
75%,2021.0,8.0,37836.0
max,2021.0,10.0,45376.0
