In [2]:
import pandas as pd
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression

# Load the data from the CSV file
df = pd.read_csv('../Queries/product_revenue.csv')
df.head()

# Filter the data for the year 2021 and the first 9 months
df = df[(df['order_year'] == 2021) & (df['order_month'] < 10)]

# Convert the order_month column to a categorical variable
df['order_month'] = pd.Categorical(df['order_month'])

# Pivot the table to get the total revenue for each product type in each month
df_pivot = df.pivot(index='product_type', columns='order_month', values='total_revenue')

# Fill any missing values with 0
df_pivot = df_pivot.fillna(0)

# Create a new column for the total revenue for the first 9 months of 2021
df_pivot['total_revenue'] = df_pivot.sum(axis=1)

# Train a linear regression model to predict the total revenue for the next quarter
X = [[10], [11], [12]]
y = df_pivot['total_revenue'].values.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)

# Predict the total revenue for the next quarter
next_quarter_revenue = model.predict([[10], [11], [12]])
print(next_quarter_revenue)

# Calculate the R-squared and p-value for the linear regression model
X_t = np.transpose(X)
slope, intercept, r_value, p_value, std_err = stats.linregress(X_t.ravel(), y.ravel())
print(f"R-squared: {r_value**2}")
print(f"p-value: {p_value}")


[[325876.66666667]
 [315844.66666667]
 [305812.66666667]]
R-squared: 0.5895943732944382
p-value: 0.44265272131296707


In [3]:
df.head()

Unnamed: 0,product_type,order_year,order_month,total_revenue
0,Trousers,2021,1,40317
1,Trousers,2021,2,33662
2,Shirt,2021,9,32285
3,Jacket,2021,4,33790
4,Jacket,2021,8,38281


In [4]:
df_pivot.head()

order_month,1,2,3,4,5,6,7,8,9,total_revenue
product_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Jacket,42515,33704,45376,33790,30808,32337,38787,38281,35111,330709
Shirt,36501,27714,42469,33865,24040,35443,41213,32650,32285,306180
Trousers,40317,33662,43519,29875,30749,33678,36081,33634,29130,310645
