In [17]:
import pandas as pd
from sklearn.linear_model import LinearRegression

In [20]:
# Load the data from the CSV file
df = pd.read_csv('../Queries/product_revenue.csv')
df.head()

Unnamed: 0,product_type,order_year,order_month,total_revenue
0,Trousers,2021,1,40317
1,Trousers,2021,2,33662
2,Shirt,2021,9,32285
3,Jacket,2021,4,33790
4,Jacket,2021,8,38281


In [21]:
# Filter the data for the year 2021 and the first 9 months
df = df[(df['order_year'] == 2021) & (df['order_month'] < 10)]

In [22]:

# Convert the order_month column to a categorical variable
df['order_month'] = pd.Categorical(df['order_month'])

In [23]:
# Pivot the table to get the total revenue for each product type in each month
df_pivot = df.pivot(index='product_type', columns='order_month', values='total_revenue')


In [24]:
# Fill any missing values with 0
df_pivot = df_pivot.fillna(0)

In [25]:
# Create a new column for the total revenue for the first 9 months of 2021
df_pivot['total_revenue'] = df_pivot.sum(axis=1)

In [26]:
# Train a linear regression model to predict the total revenue for the next quarter
X = [[10], [11], [12]]
y = df_pivot['total_revenue'].values.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)


In [27]:
# Predict the total revenue for the next quarter
next_quarter_revenue = model.predict([[10], [11], [12]])
print(next_quarter_revenue)

[[325876.66666667]
 [315844.66666667]
 [305812.66666667]]


In [28]:
import numpy as np

# Get the coefficient and intercept of the linear regression line
coef = model.coef_[0]
intercept = model.intercept_

# Calculate the R-squared value of the linear regression model
y_pred = model.predict(X)
r_squared = model.score(X, y)

# Calculate the mean squared error of the linear regression model
mse = np.mean((y - y_pred) ** 2)

# Print the results
print('Coefficient:', coef)
print('Intercept:', intercept)
print('R-squared:', r_squared)
print('Mean squared error:', mse)

Coefficient: [-10032.]
Intercept: [426196.66666667]
R-squared: 0.5895943732944384
Mean squared error: 46702890.88888889


In [29]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Filter the data for the year 2021
df = df[df['order_year'] == 2021]

# Train a linear regression model to predict total revenue
X = df[['order_month']]
y = df['total_revenue']
X = sm.add_constant(X)  # Add a constant term to the input features
model = sm.OLS(y, X)
results = model.fit()

# Get the slope coefficient and p-value
coef = results.params['order_month']
p_value = results.pvalues['order_month']

# Print the results
print('Coefficient:', coef)
print('P-value:', p_value)

Coefficient: -496.7277777777776
P-value: 0.2007010610845552
