# Walmart Storewise Sales Analysis and Prediction

### 2.1)Importing Libraries

In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder , FunctionTransformer , LabelEncoder , PowerTransformer
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer 
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score ,mean_absolute_percentage_error , accuracy_score
from sklearn.tree import DecisionTreeRegressor ,plot_tree ,DecisionTreeClassifier
from sklearn.linear_model import LinearRegression , LogisticRegression
from sklearn.neighbors import KNeighborsRegressor , KNeighborsClassifier
from statsmodels.tsa.seasonal import seasonal_decompose


### 2.2) Loading the Dataset

In [38]:
df = pd.read_csv("Walmart.csv")


### 3.4.1) Removing Outliers of Weekly Sales

In [39]:
print(df["Weekly_Sales"].min())
print(df["Weekly_Sales"].max())
print(df["Weekly_Sales"].std())
print(df["Weekly_Sales"].mean())

percentile25=df["Weekly_Sales"].quantile(0.25)
percentile75=df["Weekly_Sales"].quantile(0.75)

iqr = percentile75 - percentile25

upper_limit_weekly_sales=percentile75 +1.5 * iqr
lower_limit_weekly_sales=percentile75 -1.5 * iqr
print("Upper Limit:",upper_limit_weekly_sales)
print("Lower Limit:",lower_limit_weekly_sales)

print("Weekly Sales Above Upper Limit Data Points",df[df["Weekly_Sales"]>upper_limit_weekly_sales].shape)

print("Weekly Sales Below Lower Limit Data Points",df[df["Weekly_Sales"]<lower_limit_weekly_sales].shape)

df = df[df["Weekly_Sales"] < upper_limit_weekly_sales]



209986.25
3818686.45
564366.6220536975
1046964.8775617715
Upper Limit: 2720371.4924999997
Lower Limit: 119945.8274999999
Weekly Sales Above Upper Limit Data Points (34, 8)
Weekly Sales Below Lower Limit Data Points (0, 8)


# Predicting the 45 Stores Weekly Sales

In [40]:
stores = df['Store'].unique()

In [41]:
store_data = {}

# Iterate over each store and store its data in store_data dictionary
for store in stores:
    store_data[store] = df[df['Store'] == store]

# Dictionary to hold models, scores, and predictions for each store
store_models = {}
store_scores = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_predictions = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_actuals = []

In [42]:
# Initialize dictionaries to store models, scores, predictions, and actuals
store_models = {}
store_scores = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_predictions = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_actuals = []

# Iterate over each store and train models
for store in stores:
    # Get data for the current store
    store_df = quarterly_df[quarterly_df['Store'] == store]
    
    # Split data into features and target variable
    X = store_df[['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']]
    y = store_df['Weekly_Sales']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
    
    # Transform the target variables using log transformation
    trf = FunctionTransformer(func=np.log1p, validate=True)
    
    # Note: Transform y_train and y_test into 2D arrays for the transformer
    y_train_trans = trf.fit_transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_trans = trf.transform(y_test.values.reshape(-1, 1)).flatten()
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train_trans)
    
    # Train KNN model (without Grid Search)
    knn = KNeighborsRegressor(n_neighbors=5)  # Simple KNN with 5 neighbors
    knn.fit(X_train_scaled, y_train_trans)
    
    # Train Decision Tree model
    dt_model = DecisionTreeRegressor(max_depth=3, min_samples_split=6, min_samples_leaf=2)
    dt_model.fit(X_train_scaled, y_train_trans)
    
    # Predict on the test set
    lr_pred = lr_model.predict(X_test_scaled)
    knn_pred = knn.predict(X_test_scaled)
    dt_pred = dt_model.predict(X_test_scaled)
    
    # Inverse transform predictions and actuals
    y_test_inv = np.expm1(trf.inverse_transform(y_test_trans.reshape(-1, 1)).flatten())
    lr_pred_inv = np.expm1(trf.inverse_transform(lr_pred.reshape(-1, 1)).flatten())
    knn_pred_inv = np.expm1(trf.inverse_transform(knn_pred.reshape(-1, 1)).flatten())
    dt_pred_inv = np.expm1(trf.inverse_transform(dt_pred.reshape(-1, 1)).flatten())
    
    # Calculate MAPE scores
    mape_lr = mean_absolute_percentage_error(y_test_inv, lr_pred_inv) * 100
    mape_knn = mean_absolute_percentage_error(y_test_inv, knn_pred_inv) * 100
    mape_dt = mean_absolute_percentage_error(y_test_inv, dt_pred_inv) * 100

    # Store models, scores, and predictions in dictionaries
    store_models[store] = {
        'LinearRegression': lr_model,
        'KNN': knn,
        'DecisionTree': dt_model
    }
    
    store_scores['LinearRegression'].append(mape_lr)
    store_scores['KNN'].append(mape_knn)
    store_scores['DecisionTree'].append(mape_dt)
    
    store_predictions['LinearRegression'].append(lr_pred_inv)
    store_predictions['KNN'].append(knn_pred_inv)
    store_predictions['DecisionTree'].append(dt_pred_inv)
    
    store_actuals.append(y_test_inv)
    
    # Print scores for the current store
    print(f'Store {store}:')
    print(f'  Linear Regression MAPE Score: {mape_lr:.2f}')
    print(f'  KNN MAPE Score: {mape_knn:.2f}')
    print(f'  Decision Tree MAPE Score: {mape_dt:.2f}')
    print('\n')

# Optionally, you can print or save detailed predictions for each store
# For example:
# for store_index, store in enumerate(stores):
#     print(f'Store {store}:')
#     for model in ['LinearRegression', 'KNN', 'DecisionTree']:
#         predictions = store_predictions[model][store_index]
#         actuals = store_actuals[store_index]
#         predictions_str = ', '.join([f'{pred:.2f}' for pred in predictions])
#         actuals_str = ', '.join([f'{actual:.2f}' for actual in actuals])
#         print(f'  {model} Predictions: [{predictions_str}]')
#         print(f'  {model} Actuals: [{actuals_str}]')
#     print('\n')

Store 1:
  Linear Regression MAPE Score: 2.30
  KNN MAPE Score: 5.00
  Decision Tree MAPE Score: 3.23


Store 2:
  Linear Regression MAPE Score: 4.25
  KNN MAPE Score: 3.51
  Decision Tree MAPE Score: 6.61


Store 3:
  Linear Regression MAPE Score: 5.63
  KNN MAPE Score: 6.14
  Decision Tree MAPE Score: 6.45


Store 4:
  Linear Regression MAPE Score: 6.57
  KNN MAPE Score: 8.06
  Decision Tree MAPE Score: 8.88


Store 5:
  Linear Regression MAPE Score: 5.46
  KNN MAPE Score: 5.18
  Decision Tree MAPE Score: 8.82


Store 6:
  Linear Regression MAPE Score: 6.41
  KNN MAPE Score: 6.16
  Decision Tree MAPE Score: 11.67


Store 7:
  Linear Regression MAPE Score: 15.60
  KNN MAPE Score: 15.89
  Decision Tree MAPE Score: 15.97


Store 8:
  Linear Regression MAPE Score: 4.01
  KNN MAPE Score: 4.76
  Decision Tree MAPE Score: 5.58


Store 9:
  Linear Regression MAPE Score: 6.06
  KNN MAPE Score: 6.61
  Decision Tree MAPE Score: 6.38


Store 10:
  Linear Regression MAPE Score: 6.68
  KNN MAPE Sc

In [10]:
for store_index, store in enumerate(stores):
    print(f'Store {store}:')
    for model in ['LinearRegression', 'KNN', 'DecisionTree']:
        predictions = np.expm1(store_predictions[model][store_index])
        actuals = np.expm1(store_actuals[store_index])
        predictions_str = ', '.join([f'{pred:.2f}' for pred in predictions])
        actuals_str = ', '.join([f'{actual:.2f}' for actual in actuals])
        print(f'  {model} Predictions: [{predictions_str}]')
        print(f'  {model} Actuals: [{actuals_str}]')
    print('\n')


Store 1:
  LinearRegression Predictions: [1550158.79, 1528390.85, 1603182.74, 1535435.01, 1647602.31, 1531118.48, 1537842.34, 1515440.02, 1548140.14, 1565300.57, 1580649.67, 1563759.02, 1470430.93, 1534713.61, 1532143.88, 1512107.13, 1500866.53, 1533012.87, 1472230.26, 1565148.99, 1505996.86, 1491317.73, 1543872.40, 1609900.72, 1546549.06, 1613801.85, 1574407.86, 1574742.95, 1577107.00]
  LinearRegression Actuals: [1629391.28, 2387950.20, 1649604.63, 1455090.69, 1881176.67, 1396926.82, 1686842.78, 1508239.93, 1631135.79, 1527014.04, 1630989.95, 1394561.83, 1513080.49, 1367320.01, 1456800.28, 1635078.41, 1453329.50, 1464693.46, 1422711.60, 1643690.90, 1345454.00, 1391256.12, 1428218.27, 1636339.65, 1316899.31, 1594938.89, 1641957.44, 1517428.87, 1697230.96]
  KNN Predictions: [1542862.92, 1535885.36, 1590224.56, 1507478.98, 1606392.82, 1517443.56, 1556229.74, 1528024.76, 1570274.30, 1570274.30, 1548462.56, 1506035.86, 1477638.59, 1543091.67, 1546714.92, 1517443.56, 1500394.16, 1517443.5

# Monthly Store Wise Prediction

In [44]:
# Load and prepare the data
store_wise = pd.read_csv("Walmart.csv")

# Convert 'Date' to datetime
store_wise['Date'] = pd.to_datetime(store_wise['Date'], format='%d-%m-%Y')

# Create 'Monthly' period column and aggregate data
store_wise["Monthly"] = store_wise["Date"].dt.to_period("M").dt.to_timestamp()
monthly_df = store_wise.groupby(["Store", "Monthly"])[["Weekly_Sales", "Temperature", "Fuel_Price", "CPI", "Unemployment"]].sum().reset_index()

# Get unique store numbers from the 'Store' column
stores = monthly_df['Store'].unique()

# Initialize dictionaries to store models, scores, and predictions for each store
store_models = {}
store_scores = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_predictions = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_actuals = []

# Iterate over each store and train models
for store in stores:
    # Get data for the current store
    store_df = monthly_df[monthly_df['Store'] == store]
    
    # Split data into features and target variable
    X = store_df[['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']]
    y = store_df['Weekly_Sales']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
    
    # Transform the target variables using log transformation
    trf = FunctionTransformer(func=np.log1p, validate=True)
    y_train_trans = trf.fit_transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_trans = trf.transform(y_test.values.reshape(-1, 1)).flatten()
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train_trans)
    
    # Train KNN model (without Grid Search)
    knn = KNeighborsRegressor(n_neighbors=5)  # Simple KNN with 5 neighbors
    knn.fit(X_train_scaled, y_train_trans)
    
    # Train Decision Tree model
    dt_model = DecisionTreeRegressor(max_depth=3, min_samples_split=6, min_samples_leaf=2)
    dt_model.fit(X_train_scaled, y_train_trans)
    
    # Predict on the test set
    lr_pred = lr_model.predict(X_test_scaled)
    knn_pred = knn.predict(X_test_scaled)
    dt_pred = dt_model.predict(X_test_scaled)
    
    # Inverse transform predictions and actuals
    y_test_inv = np.expm1(trf.inverse_transform(y_test_trans.reshape(-1, 1)).flatten())
    lr_pred_inv = np.expm1(trf.inverse_transform(lr_pred.reshape(-1, 1)).flatten())
    knn_pred_inv = np.expm1(trf.inverse_transform(knn_pred.reshape(-1, 1)).flatten())
    dt_pred_inv = np.expm1(trf.inverse_transform(dt_pred.reshape(-1, 1)).flatten())
    
    # Calculate MAPE scores
    mape_lr = mean_absolute_percentage_error(y_test_inv, lr_pred_inv) * 100
    mape_knn = mean_absolute_percentage_error(y_test_inv, knn_pred_inv) * 100
    mape_dt = mean_absolute_percentage_error(y_test_inv, dt_pred_inv) * 100

    # Store models, scores, and predictions in dictionaries
    store_models[store] = {
        'LinearRegression': lr_model,
        'KNN': knn,
        'DecisionTree': dt_model
    }
    
    store_scores['LinearRegression'].append(mape_lr)
    store_scores['KNN'].append(mape_knn)
    store_scores['DecisionTree'].append(mape_dt)
    
    store_predictions['LinearRegression'].append(lr_pred_inv)
    store_predictions['KNN'].append(knn_pred_inv)
    store_predictions['DecisionTree'].append(dt_pred_inv)
    
    store_actuals.append(y_test_inv)
    
    # Print scores for the current store
    print(f'Store {store}:')
    print(f'  Linear Regression MAPE Score: {mape_lr:.2f}')
    print(f'  KNN MAPE Score: {mape_knn:.2f}')
    print(f'  Decision Tree MAPE Score: {mape_dt:.2f}')
    print('\n')

# Optionally, you can print or save detailed predictions for each store
# For example:
# for store_index, store in enumerate(stores):
#     print(f'Store {store}:')
#     for model in ['LinearRegression', 'KNN', 'DecisionTree']:
#         predictions = store_predictions[model][store_index]
#         actuals = store_actuals[store_index]
#         predictions_str = ', '.join([f'{pred:.2f}' for pred in predictions])
#         actuals_str = ', '.join([f'{actual:.2f}' for actual in actuals])
#         print(f'  {model} Predictions: [{predictions_str}]')
#         print(f'  {model} Actuals: [{actuals_str}]')
#     print('\n')

Store 1:
  Linear Regression MAPE Score: 5.28
  KNN MAPE Score: 4.96
  Decision Tree MAPE Score: 7.15


Store 2:
  Linear Regression MAPE Score: 5.92
  KNN MAPE Score: 5.74
  Decision Tree MAPE Score: 8.05


Store 3:
  Linear Regression MAPE Score: 5.63
  KNN MAPE Score: 6.66
  Decision Tree MAPE Score: 7.98


Store 4:
  Linear Regression MAPE Score: 5.73
  KNN MAPE Score: 7.72
  Decision Tree MAPE Score: 8.79


Store 5:
  Linear Regression MAPE Score: 6.50
  KNN MAPE Score: 6.21
  Decision Tree MAPE Score: 8.28


Store 6:
  Linear Regression MAPE Score: 7.57
  KNN MAPE Score: 7.72
  Decision Tree MAPE Score: 7.09


Store 7:
  Linear Regression MAPE Score: 10.58
  KNN MAPE Score: 9.27
  Decision Tree MAPE Score: 17.16


Store 8:
  Linear Regression MAPE Score: 6.70
  KNN MAPE Score: 6.26
  Decision Tree MAPE Score: 9.16


Store 9:
  Linear Regression MAPE Score: 7.62
  KNN MAPE Score: 6.93
  Decision Tree MAPE Score: 9.12


Store 10:
  Linear Regression MAPE Score: 6.50
  KNN MAPE Scor

In [45]:
# Print predicted and actual values arrays for each store and model
for store_index, store in enumerate(stores):
    print(f'Store {store}:')
    for model in ['LinearRegression', 'KNN', 'DecisionTree']:
        predictions = store_predictions[model][store_index]
        actuals = store_actuals[store_index]
        predictions_str = ', '.join([f'{pred:.2f}' for pred in predictions])
        actuals_str = ', '.join([f'{actual:.2f}' for actual in actuals])
        print(f'  {model} Predictions: [{predictions_str}]')
        print(f'  {model} Actuals: [{actuals_str}]')
    print('\n')

Store 1:
  LinearRegression Predictions: [5870214.87, 8064696.10, 6136075.38, 6076851.47, 7713453.31, 6170548.69, 6257584.52]
  LinearRegression Actuals: [6084081.46, 8201997.40, 6307375.48, 5871293.98, 8876953.18, 6399887.57, 6864972.83]
  KNN Predictions: [6074687.27, 7985170.38, 6223351.90, 5973939.45, 7679305.26, 6139428.78, 6092979.28]
  KNN Actuals: [6084081.46, 8201997.40, 6307375.48, 5871293.98, 8876953.18, 6399887.57, 6864972.83]
  DecisionTree Predictions: [5904277.32, 8301786.39, 5904277.32, 6250536.65, 7427931.31, 5904277.32, 6250536.65]
  DecisionTree Actuals: [6084081.46, 8201997.40, 6307375.48, 5871293.98, 8876953.18, 6399887.57, 6864972.83]


Store 2:
  LinearRegression Predictions: [7455438.02, 9606974.27, 7466986.54, 7766431.29, 9870240.27, 7730749.98, 7699753.65]
  LinearRegression Actuals: [8069642.74, 9514186.05, 7529434.80, 7677765.60, 12190116.80, 8011783.74, 8397397.85]
  KNN Predictions: [7668951.59, 9682646.62, 7542920.07, 7648317.03, 9703047.42, 7710192.29, 7

In [46]:
monthly_df

Unnamed: 0,Store,Monthly,Weekly_Sales,Temperature,Fuel_Price,CPI,Unemployment
0,1,2010-02-01,6307344.10,167.38,10.195,844.947314,32.424
1,1,2010-03-01,5871293.98,210.32,10.744,844.964463,32.424
2,1,2010-04-01,7422801.92,326.70,13.872,1052.760676,39.040
3,1,2010-05-01,5929938.64,304.21,11.274,842.191249,31.232
4,1,2010-06-01,6084081.46,329.57,10.663,845.424947,31.232
...,...,...,...,...,...,...,...
1480,45,2012-06-01,4049712.68,360.15,18.121,955.218533,42.835
1481,45,2012-07-01,3042463.41,319.05,14.212,764.635011,34.736
1482,45,2012-08-01,3633793.89,378.65,18.884,956.361363,43.420
1483,45,2012-09-01,2905011.08,273.77,15.894,767.147787,34.736


# Quarterly Store Wise 

In [47]:
store_wise_quar = pd.read_csv("Walmart.csv")

# Convert 'Date' to datetime
store_wise_quar['Date'] = pd.to_datetime(store_wise_quar['Date'], format='%d-%m-%Y')

# Create 'Quarterly' period column and aggregate data
store_wise_quar["Quarterly"] = store_wise_quar["Date"].dt.to_period("Q").dt.to_timestamp()
quarterly_df = store_wise_quar.groupby(["Store", "Quarterly"])[["Weekly_Sales", "Temperature", "Fuel_Price", "CPI", "Unemployment"]].sum().reset_index()

# Get unique store numbers from the 'Store' column
stores = quarterly_df['Store'].unique()

# Initialize dictionaries to store models, scores, and predictions for each store
store_models = {}
store_scores = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_predictions = {'LinearRegression': [], 'KNN': [], 'DecisionTree': []}
store_actuals = []

# Iterate over each store and train models
for store in stores:
    # Get data for the current store
    store_df = quarterly_df[quarterly_df['Store'] == store]
    
    # Split data into features and target variable
    X = store_df[['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']]
    y = store_df['Weekly_Sales']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
    
    # Transform the target variables using log transformation
    trf = FunctionTransformer(func=np.log1p, validate=True)
    y_train_trans = trf.fit_transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_trans = trf.transform(y_test.values.reshape(-1, 1)).flatten()
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Linear Regression model
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train_trans)
    
    # Train KNN model (without Grid Search)
    knn = KNeighborsRegressor(n_neighbors=5)  # Simple KNN with 5 neighbors
    knn.fit(X_train_scaled, y_train_trans)
    
    # Train Decision Tree model
    dt_model = DecisionTreeRegressor(max_depth=3, min_samples_split=6, min_samples_leaf=2)
    dt_model.fit(X_train_scaled, y_train_trans)
    
    # Predict on the test set
    lr_pred = lr_model.predict(X_test_scaled)
    knn_pred = knn.predict(X_test_scaled)
    dt_pred = dt_model.predict(X_test_scaled)
    
    # Inverse transform predictions and actuals
    y_test_inv = np.expm1(trf.inverse_transform(y_test_trans.reshape(-1, 1)).flatten())
    lr_pred_inv = np.expm1(trf.inverse_transform(lr_pred.reshape(-1, 1)).flatten())
    knn_pred_inv = np.expm1(trf.inverse_transform(knn_pred.reshape(-1, 1)).flatten())
    dt_pred_inv = np.expm1(trf.inverse_transform(dt_pred.reshape(-1, 1)).flatten())
    
    # Calculate MAPE scores
    mape_lr = mean_absolute_percentage_error(y_test_inv, lr_pred_inv) * 100
    mape_knn = mean_absolute_percentage_error(y_test_inv, knn_pred_inv) * 100
    mape_dt = mean_absolute_percentage_error(y_test_inv, dt_pred_inv) * 100

    # Store models, scores, and predictions in dictionaries
    store_models[store] = {
        'LinearRegression': lr_model,
        'KNN': knn,
        'DecisionTree': dt_model
    }
    
    store_scores['LinearRegression'].append(mape_lr)
    store_scores['KNN'].append(mape_knn)
    store_scores['DecisionTree'].append(mape_dt)
    
    store_predictions['LinearRegression'].append(lr_pred_inv)
    store_predictions['KNN'].append(knn_pred_inv)
    store_predictions['DecisionTree'].append(dt_pred_inv)
    
    store_actuals.append(y_test_inv)
    
    # Print scores for the current store
    print(f'Store {store}:')
    print(f'  Linear Regression MAPE Score: {mape_lr:.2f}')
    print(f'  KNN MAPE Score: {mape_knn:.2f}')
    print(f'  Decision Tree MAPE Score: {mape_dt:.2f}')
    print('\n')

Store 1:
  Linear Regression MAPE Score: 2.30
  KNN MAPE Score: 5.00
  Decision Tree MAPE Score: 3.23


Store 2:
  Linear Regression MAPE Score: 4.25
  KNN MAPE Score: 3.51
  Decision Tree MAPE Score: 6.61


Store 3:
  Linear Regression MAPE Score: 5.63
  KNN MAPE Score: 6.14
  Decision Tree MAPE Score: 6.45


Store 4:
  Linear Regression MAPE Score: 6.57
  KNN MAPE Score: 8.06
  Decision Tree MAPE Score: 8.88


Store 5:
  Linear Regression MAPE Score: 5.46
  KNN MAPE Score: 5.18
  Decision Tree MAPE Score: 8.82


Store 6:
  Linear Regression MAPE Score: 6.41
  KNN MAPE Score: 6.16
  Decision Tree MAPE Score: 11.67


Store 7:
  Linear Regression MAPE Score: 15.60
  KNN MAPE Score: 15.89
  Decision Tree MAPE Score: 15.97


Store 8:
  Linear Regression MAPE Score: 4.01
  KNN MAPE Score: 4.76
  Decision Tree MAPE Score: 5.58


Store 9:
  Linear Regression MAPE Score: 6.06
  KNN MAPE Score: 6.61
  Decision Tree MAPE Score: 6.38


Store 10:
  Linear Regression MAPE Score: 6.68
  KNN MAPE Sc

In [48]:
# Print predicted and actual values arrays for each store and model
for store_index, store in enumerate(stores):
    print(f'Store {store}:')
    for model in ['LinearRegression', 'KNN', 'DecisionTree']:
        predictions = store_predictions[model][store_index]
        actuals = store_actuals[store_index]
        predictions_str = ', '.join([f'{pred:.2f}' for pred in predictions])
        actuals_str = ', '.join([f'{actual:.2f}' for actual in actuals])
        print(f'  {model} Predictions: [{predictions_str}]')
        print(f'  {model} Actuals: [{actuals_str}]')
    print('\n')

Store 1:
  LinearRegression Predictions: [21787365.54, 19405660.88, 21946875.67]
  LinearRegression Actuals: [21969895.29, 19436822.02, 20723762.83]
  KNN Predictions: [20340387.51, 20551347.75, 20340387.51]
  KNN Actuals: [21969895.29, 19436822.02, 20723762.83]
  DecisionTree Predictions: [20884220.24, 18662562.66, 20884220.24]
  DecisionTree Actuals: [21969895.29, 19436822.02, 20723762.83]


Store 2:
  LinearRegression Predictions: [27020003.28, 24920479.71, 26920889.05]
  LinearRegression Actuals: [27359644.13, 25367303.89, 24528220.70]
  KNN Predictions: [24979632.69, 25368171.07, 24979632.69]
  KNN Actuals: [27359644.13, 25367303.89, 24528220.70]
  DecisionTree Predictions: [24000099.51, 24000099.51, 24000099.51]
  DecisionTree Actuals: [27359644.13, 25367303.89, 24528220.70]


Store 3:
  LinearRegression Predictions: [5855840.18, 4935904.28, 6179252.29]
  LinearRegression Actuals: [5792920.41, 4846326.40, 5421809.72]
  KNN Predictions: [5318163.68, 5249269.20, 5318163.68]
  KNN A

In [49]:
quarterly_df

Unnamed: 0,Store,Quarterly,Weekly_Sales,Temperature,Fuel_Price,CPI,Unemployment
0,1,2010-01-01,12178638.08,377.70,20.939,1689.911777,64.848
1,1,2010-04-01,19436822.02,960.48,35.809,2740.376872,101.504
2,1,2010-07-01,19150229.91,1078.72,34.132,2748.498255,101.231
3,1,2010-10-01,22513141.99,822.98,38.569,2963.889582,109.732
4,1,2011-01-01,18187314.02,616.33,37.814,2556.070564,92.904
...,...,...,...,...,...,...,...
535,45,2011-10-01,11917228.18,639.39,45.424,2447.366847,110.799
536,45,2012-01-01,9805267.57,553.63,48.105,2468.744234,109.512
537,45,2012-04-01,10390767.83,828.00,49.678,2482.242994,111.371
538,45,2012-07-01,9581268.38,971.47,48.990,2488.144161,112.892
