In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('Neural_prediction_PHD.csv')

In [4]:
df.head()

Unnamed: 0,state_id,d,month,year,snap_CA,snap_TX,snap_WI,sell_price,simple_moving_average,exp_weighted_moving_average,cum_moving_average,total_price,Neural_prediction,sales
0,0,1885,3,2016,0,0,0,8.26,1.071428,1.392841,1.427774,8.26,-1,1
1,2,1885,3,2016,0,0,0,2.68,1.357143,1.007385,1.427358,2.68,1,1
2,0,1885,3,2016,0,0,0,1.0,0.892857,1.434569,1.427874,5.0,4,5
3,0,1885,3,2016,0,0,0,2.48,1.928572,3.061044,1.427875,42.16,15,17
4,2,1885,3,2016,0,0,0,1.98,1.607143,1.349851,1.427358,0.0,0,0


### Calculate The Absolute Prediction Error for the MLP

In [5]:
df['Neural_prediction_error'] = abs(df['sales'] - df['Neural_prediction'])

In [6]:
df.head()

Unnamed: 0,state_id,d,month,year,snap_CA,snap_TX,snap_WI,sell_price,simple_moving_average,exp_weighted_moving_average,cum_moving_average,total_price,Neural_prediction,sales,Neural_prediction_error
0,0,1885,3,2016,0,0,0,8.26,1.071428,1.392841,1.427774,8.26,-1,1,2
1,2,1885,3,2016,0,0,0,2.68,1.357143,1.007385,1.427358,2.68,1,1,0
2,0,1885,3,2016,0,0,0,1.0,0.892857,1.434569,1.427874,5.0,4,5,1
3,0,1885,3,2016,0,0,0,2.48,1.928572,3.061044,1.427875,42.16,15,17,2
4,2,1885,3,2016,0,0,0,1.98,1.607143,1.349851,1.427358,0.0,0,0,0


In [7]:
df2 = pd.read_csv('Stack_prediction_PHD2.csv')

In [8]:
df2.head()

Unnamed: 0,state_id,d,month,year,snap_CA,snap_TX,snap_WI,sell_price,simple_moving_average,exp_weighted_moving_average,cum_moving_average,total_price,stack_prediction,sales
0,0,1885,3,2016,0,0,0,8.26,1.071428,1.392841,1.427774,8.26,1,1
1,2,1885,3,2016,0,0,0,2.68,1.357143,1.007385,1.427358,2.68,1,1
2,0,1885,3,2016,0,0,0,1.0,0.892857,1.434569,1.427874,5.0,5,5
3,0,1885,3,2016,0,0,0,2.48,1.928572,3.061044,1.427875,42.16,17,17
4,2,1885,3,2016,0,0,0,1.98,1.607143,1.349851,1.427358,0.0,0,0


### Calculate the Absolute Prediction Error for the Stacked Ensemble Learner

In [9]:
df2['stack_prediction_error'] = abs(df2['sales'] - df2['stack_prediction'])

In [10]:
df2.head()

Unnamed: 0,state_id,d,month,year,snap_CA,snap_TX,snap_WI,sell_price,simple_moving_average,exp_weighted_moving_average,cum_moving_average,total_price,stack_prediction,sales,stack_prediction_error
0,0,1885,3,2016,0,0,0,8.26,1.071428,1.392841,1.427774,8.26,1,1,0
1,2,1885,3,2016,0,0,0,2.68,1.357143,1.007385,1.427358,2.68,1,1,0
2,0,1885,3,2016,0,0,0,1.0,0.892857,1.434569,1.427874,5.0,5,5,0
3,0,1885,3,2016,0,0,0,2.48,1.928572,3.061044,1.427875,42.16,17,17,0
4,2,1885,3,2016,0,0,0,1.98,1.607143,1.349851,1.427358,0.0,0,0,0


**Create dataframe for both prediction errors**

In [11]:
df_error = pd.DataFrame({
    'Neural_prediction_error': df['Neural_prediction_error'],
    'stack_prediction_error': df2['stack_prediction_error']
})

df_error.head()

Unnamed: 0,Neural_prediction_error,stack_prediction_error
0,2,0
1,0,0
2,1,0
3,2,0
4,0,0


**Summary Statistics**

In [12]:
df_error.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Neural_prediction_error,884210.0,0.395364,0.739629,0.0,0.0,0.0,1.0,90.0
stack_prediction_error,884210.0,0.000215,0.021054,0.0,0.0,0.0,0.0,8.0


**Check normal distribution of the independent samples using the Anderson-Darling Test**

In [13]:
from scipy import stats

# Conduct Anderson-Darling test on 'Neural_prediction_error'
result_nn_error = stats.anderson(df_error['Neural_prediction_error'], dist='norm')

# The result object contains a statistic and critical values
print('Test statistic for Neural_prediction_error: {}'.format(result_nn_error.statistic))

# Compare the statistic with critical values
for i in range(len(result_nn_error.critical_values)):
    sl, cv = result_nn_error.significance_level[i], result_nn_error.critical_values[i]
    if result_nn_error.statistic < cv:
        print('For significance level {}, critical value is {}. Data looks normal (fail to reject H0)'.format(sl, cv))
    else:
        print('For significance level {}, critical value is {}. Data does not look normal (reject H0)'.format(sl, cv))

# Conduct Anderson-Darling test on 'stack_prediction_error'
result_sp_error = stats.anderson(df_error['stack_prediction_error'], dist='norm')

# The result object contains a statistic and critical values
print('\nTest statistic for stack_prediction_error: {}'.format(result_sp_error.statistic))

# Compare the statistic with critical values
for i in range(len(result_sp_error.critical_values)):
    sl, cv = result_sp_error.significance_level[i], result_sp_error.critical_values[i]
    if result_sp_error.statistic < cv:
        print('For significance level {}, critical value is {}. Data looks normal (fail to reject H0)'.format(sl, cv))
    else:
        print('For significance level {}, critical value is {}. Data does not look normal (reject H0)'.format(sl, cv))

Test statistic for Neural_prediction_error: 127246.71938066673
For significance level 15.0, critical value is 0.576. Data does not look normal (reject H0)
For significance level 10.0, critical value is 0.656. Data does not look normal (reject H0)
For significance level 5.0, critical value is 0.787. Data does not look normal (reject H0)
For significance level 2.5, critical value is 0.918. Data does not look normal (reject H0)
For significance level 1.0, critical value is 1.092. Data does not look normal (reject H0)

Test statistic for stack_prediction_error: 341454.0541812228
For significance level 15.0, critical value is 0.576. Data does not look normal (reject H0)
For significance level 10.0, critical value is 0.656. Data does not look normal (reject H0)
For significance level 5.0, critical value is 0.787. Data does not look normal (reject H0)
For significance level 2.5, critical value is 0.918. Data does not look normal (reject H0)
For significance level 1.0, critical value is 1.092.

The samples do not have normal distributions, and thus, the Mann-Whitney U test will be used.

**Calculate The Mann-Whitney U test**

In [14]:

from scipy.stats import mannwhitneyu

# Calculate the Mann-Whitney U test
statistic, p_value = mannwhitneyu(df_error['Neural_prediction_error'], df_error['stack_prediction_error'])

# Define the significance level
alpha = 0.05

# Check if the p-value is less than the significance level
if p_value < alpha:
    hypothesis_result = 'Reject'
else:
    hypothesis_result = 'Accept'

# Print the results
print(f"Mann-Whitney U statistic: {statistic}")
print(f"P-value: {p_value}")
print(f"Hypothesis: {hypothesis_result} the null hypothesis")

Mann-Whitney U statistic: 523116004996.5
P-value: 0.0
Hypothesis: Reject the null hypothesis


In [17]:
confidence_level = 1 - alpha
print(f"Confidence Level: {confidence_level}")

Confidence Level: 0.95


This code is performing a Mann-Whitney U test (also known as Wilcoxon rank-sum test) which is a nonparametric statistical test to determine whether there are differences between two independent groups of data. It's often used when your data are not normally distributed.

## Result


The p-value is 0.0 from the hypothesis testing. The result is significant at p < .05. The value is less than .05, therefore the null hypothesis is rejected.

In [None]:
# can you calaculate the confidence level

In [15]:
df_final = pd.DataFrame({
    'Neural_prediction': df['Neural_prediction'],
    'stack_prediction': df2['stack_prediction'],
    'sales': df['sales']
})

df_final.head()

Unnamed: 0,Neural_prediction,stack_prediction,sales
0,-1,1,1
1,1,1,1
2,4,5,5
3,15,17,17
4,0,0,0


In [16]:
df_final.to_csv('data_for_final_analysis_PHD.csv', index=False)