### **Importing Essential Libraries**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pmdarima import auto_arima

**Loading the dataset**

In [2]:
df = pd.read_csv("Features_data_set.csv")

**Filtering null values and non-null value in different dataframes**

In [3]:
df_null = df[df["CPI"].isnull()]
df_null.reset_index(drop=True, inplace=True)
print("1.null shape",df_null.shape)

df_not_null = df[df["CPI"].notnull()]
df_not_null.reset_index(drop=True, inplace=True)
print("2.not null ",df.shape)

1.null shape (585, 12)
2.not null  (8190, 12)


**Updating the Date into correct format and set it to index to proceed Time Series Analysis**

In [4]:
df_not_null['Date'] = pd.to_datetime(df_not_null['Date'], format='%d/%m/%Y', errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_not_null['Date'] = pd.to_datetime(df_not_null['Date'], format='%d/%m/%Y', errors='coerce')


In [5]:
df_not_null.set_index('Date', inplace=True)

**Function to predict the Time Series Analysis for each store seperately**

In [6]:
def cpi_predict(df):
    # Auto ARIMA to find the best model
    model = auto_arima(df['CPI'], seasonal=False, trace=True, error_action='ignore', suppress_warnings=True)
    model.fit(df['CPI'])

    # Forecast the next 33 weeks
    forecast = model.predict(n_periods=13)

    # Create a DataFrame for the forecast
    future_dates = pd.date_range(start=df.index[-1], periods=14, freq='W')[1:]  
    forecast_df = pd.DataFrame({'Date': future_dates, 'CPI': forecast})
    return forecast_df

In [7]:
def unemp_predict(df):
    # Auto ARIMA to find the best model
    model = auto_arima(df['Unemployment'], seasonal=False, trace=True, error_action='ignore', suppress_warnings=True)
    model.fit(df['Unemployment'])

    # Forecast the next 33 weeks
    forecast = model.predict(n_periods=13)

    # Create a DataFrame for the forecast
    future_dates = pd.date_range(start=df.index[-1], periods=14, freq='W')[1:]  
    forecast_df = pd.DataFrame({'Date': future_dates, 'Unemployment': forecast})
    return forecast_df

In [8]:
df_cpi = df_null.copy()

In [9]:
df_unemp = df_null.copy()

In [10]:
df_cpi = df_cpi[["Date","CPI"]]

In [11]:
df_unemp = df_unemp[["Date","Unemployment"]]

In [12]:
df_cpi.dropna(inplace = True)

In [13]:
df_unemp.dropna(inplace = True)

In [14]:
for i in range(45):
    k = cpi_predict(df_not_null[df_not_null["Store"]==i+1])
    k.drop(columns=["Date"],inplace = True)
    k = k.reset_index().rename(columns={'index': 'Date'})
    df_cpi = pd.concat([df_cpi,k])

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-434.536, Time=0.24 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-208.482, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-421.012, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-350.487, Time=0.07 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-151.030, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-432.538, Time=0.11 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-436.479, Time=0.07 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-434.297, Time=0.07 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=-436.790, Time=0.07 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=-435.952, Time=0.11 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=-432.790, Time=0.13 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=-430.789, Time=0.03 sec

Best model:  ARIMA(2,1,0)(0,0,0)[0] intercept
Total fit time: 0.982 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-43

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-421.559, Time=0.16 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-196.264, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-407.901, Time=0.04 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-338.066, Time=0.06 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-140.685, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-419.484, Time=0.12 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-423.494, Time=0.11 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-421.246, Time=0.08 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=-423.767, Time=0.07 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=-422.956, Time=0.08 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=-419.767, Time=0.11 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=-417.902, Time=0.03 sec

Best model:  ARIMA(2,1,0)(0,0,0)[0] intercept
Total fit time: 0.887 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-87

 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=-647.423, Time=0.17 sec
 ARIMA(0,1,5)(0,0,0)[0] intercept   : AIC=-645.578, Time=0.24 sec
 ARIMA(1,1,5)(0,0,0)[0] intercept   : AIC=-643.496, Time=0.31 sec
 ARIMA(0,1,4)(0,0,0)[0]             : AIC=-619.123, Time=0.14 sec

Best model:  ARIMA(0,1,4)(0,0,0)[0] intercept
Total fit time: 4.796 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-879.474, Time=0.21 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-533.290, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-873.843, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-698.179, Time=0.07 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-458.092, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-881.454, Time=0.07 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=-783.689, Time=0.17 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-883.062, Time=0.06 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-881.712, Time=0.06 sec
 ARIMA(2,1,0)(0,0,0)[0

 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-460.363, Time=0.09 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=-462.882, Time=0.06 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=-462.048, Time=0.08 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=-458.882, Time=0.13 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=-456.416, Time=0.05 sec

Best model:  ARIMA(2,1,0)(0,0,0)[0] intercept
Total fit time: 1.011 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-767.543, Time=0.18 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-514.359, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-751.834, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-664.323, Time=0.12 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-421.041, Time=0.03 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-767.162, Time=0.09 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-769.492, Time=0.11 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-768.980, Time=0.11 sec
 ARIMA(2,1,0)(0,0,0)[0

 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=-881.454, Time=0.08 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=-783.689, Time=0.17 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-883.062, Time=0.06 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=-881.712, Time=0.07 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=-883.709, Time=0.03 sec
 ARIMA(3,1,0)(0,0,0)[0] intercept   : AIC=-881.736, Time=0.13 sec
 ARIMA(3,1,1)(0,0,0)[0] intercept   : AIC=-879.511, Time=0.05 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=-881.629, Time=0.02 sec

Best model:  ARIMA(2,1,0)(0,0,0)[0] intercept
Total fit time: 1.048 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-879.474, Time=0.23 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-533.290, Time=0.04 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-873.843, Time=0.05 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-698.179, Time=0.09 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-458.092, Time=0.02 sec
 ARIMA(1,1,2)(0,0,0)[0

 ARIMA(5,1,3)(0,0,0)[0] intercept   : AIC=-639.400, Time=0.37 sec
 ARIMA(4,1,3)(0,0,0)[0] intercept   : AIC=-641.581, Time=0.28 sec
 ARIMA(3,1,3)(0,0,0)[0] intercept   : AIC=-643.372, Time=0.28 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=-645.220, Time=0.19 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=-645.357, Time=0.20 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=-630.975, Time=0.19 sec
 ARIMA(1,1,4)(0,0,0)[0] intercept   : AIC=-643.827, Time=0.23 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=-572.322, Time=0.10 sec
 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=-647.423, Time=0.18 sec
 ARIMA(0,1,5)(0,0,0)[0] intercept   : AIC=-645.578, Time=0.26 sec
 ARIMA(1,1,5)(0,0,0)[0] intercept   : AIC=-643.496, Time=0.34 sec
 ARIMA(0,1,4)(0,0,0)[0]             : AIC=-619.123, Time=0.14 sec

Best model:  ARIMA(0,1,4)(0,0,0)[0] intercept
Total fit time: 5.239 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-879.474, Time=0.23 sec
 ARIMA(0,1,0)(0,0,0)[0

In [15]:
for i in range(45):
    k = unemp_predict(df_not_null[df_not_null["Store"]==i+1])
    k.drop(columns=["Date"],inplace = True)
    k = k.reset_index().rename(columns={'index': 'Date'})
    df_unemp = pd.concat([df_unemp,k])

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-433.247, Time=0.17 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-440.516, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-438.643, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-438.651, Time=0.03 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-437.976, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-437.230, Time=0.13 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 0.414 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-444.212, Time=0.24 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-449.673, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-448.002, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-448.037, Time=0.06 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-444.427, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-448.159, Time=0.13 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercep

 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-575.209, Time=0.04 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.201 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-483.732, Time=0.14 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-490.420, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-488.613, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-488.628, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-486.841, Time=0.01 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-487.719, Time=0.17 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 0.412 seconds
Performing stepwise search to minimize aic
 ARIMA(2,2,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.20 sec
 ARIMA(0,2,0)(0,0,0)[0] intercept   : AIC=-386.808, Time=0.04 sec
 ARIMA(1,2,0)(0,0,0)[0] intercept   : AIC=-432.566, Time=0.06 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-3

 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-227.897, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-227.925, Time=0.05 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-224.883, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-227.388, Time=0.14 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 0.407 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-394.469, Time=0.06 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-402.425, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-400.446, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-400.447, Time=0.04 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-402.552, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-398.447, Time=0.06 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 0.234 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-444.212, Time=0.22 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept 

 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-480.022, Time=0.16 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 0.454 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-482.435, Time=0.15 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-488.343, Time=0.04 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-486.627, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-486.655, Time=0.03 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-483.588, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-486.406, Time=0.17 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0] intercept
Total fit time: 0.429 seconds
Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-438.447, Time=0.19 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-433.730, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-432.621, Time=0.03 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-432.799, Time=0.04 sec
 ARIMA(0,1,0)(0,0,0)[0]           

In [16]:
df_unemp = df_unemp.reset_index(drop=True)
df_cpi = df_cpi.reset_index(drop=True)

In [17]:
df_unemp

Unnamed: 0,Date,Unemployment
0,2013-05-03,6.303333
1,2013-05-10,6.292667
2,2013-05-17,6.282000
3,2013-05-24,6.271333
4,2013-05-31,6.260666
...,...,...
580,2013-06-28,8.299803
581,2013-07-05,8.295893
582,2013-07-12,8.291982
583,2013-07-19,8.288071


In [18]:
df_cpi

Unnamed: 0,Date,CPI
0,2013-05-03,225.186928
1,2013-05-10,225.222453
2,2013-05-17,225.273635
3,2013-05-24,225.336379
4,2013-05-31,225.407087
...,...,...
580,2013-06-28,194.049252
581,2013-07-05,194.118327
582,2013-07-12,194.187760
583,2013-07-19,194.257397


**Updating it to actual Dataframe**

In [19]:
df_null

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
0,1,03/05/2013,66.66,3.386,2298.63,2.00,129.90,55.46,1301.04,,,False
1,1,10/05/2013,63.90,3.392,4624.61,83.88,42.38,1618.31,8144.90,,,False
2,1,17/05/2013,69.53,3.454,16170.50,92.00,8.32,4127.24,6206.97,,,False
3,1,24/05/2013,77.19,3.494,7959.89,178.00,1621.47,3152.57,2938.70,,,False
4,1,31/05/2013,78.02,3.466,2369.77,142.45,475.35,45.55,2056.84,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...
580,45,28/06/2013,76.05,3.639,4842.29,975.03,3.00,2449.97,3169.69,,,False
581,45,05/07/2013,77.50,3.614,9090.48,2268.58,582.74,5797.47,1514.93,,,False
582,45,12/07/2013,79.37,3.614,3789.94,1827.31,85.72,744.84,2150.36,,,False
583,45,19/07/2013,82.84,3.737,2961.49,1047.07,204.19,363.00,1059.46,,,False


In [20]:
df_null.drop(columns= ["CPI","Unemployment"],inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_null.drop(columns= ["CPI","Unemployment"],inplace = True)


In [21]:
df_null = pd.merge(df_null, df_cpi["CPI"], left_index=True, right_index=True, how='inner')
df_null = pd.merge(df_null, df_unemp["Unemployment"], left_index=True, right_index=True, how='inner')

In [22]:
df_null['Date'] = pd.to_datetime(df_null['Date'], format='%d/%m/%Y', errors='coerce')

In [23]:
df_null

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,IsHoliday,CPI,Unemployment
0,1,2013-05-03,66.66,3.386,2298.63,2.00,129.90,55.46,1301.04,False,225.186928,6.303333
1,1,2013-05-10,63.90,3.392,4624.61,83.88,42.38,1618.31,8144.90,False,225.222453,6.292667
2,1,2013-05-17,69.53,3.454,16170.50,92.00,8.32,4127.24,6206.97,False,225.273635,6.282000
3,1,2013-05-24,77.19,3.494,7959.89,178.00,1621.47,3152.57,2938.70,False,225.336379,6.271333
4,1,2013-05-31,78.02,3.466,2369.77,142.45,475.35,45.55,2056.84,False,225.407087,6.260666
...,...,...,...,...,...,...,...,...,...,...,...,...
580,45,2013-06-28,76.05,3.639,4842.29,975.03,3.00,2449.97,3169.69,False,194.049252,8.299803
581,45,2013-07-05,77.50,3.614,9090.48,2268.58,582.74,5797.47,1514.93,False,194.118327,8.295893
582,45,2013-07-12,79.37,3.614,3789.94,1827.31,85.72,744.84,2150.36,False,194.187760,8.291982
583,45,2013-07-19,82.84,3.737,2961.49,1047.07,204.19,363.00,1059.46,False,194.257397,8.288071


In [24]:
df_not_null = df_not_null.reset_index().rename(columns={'index': 'Date'})

In [25]:
df_not_null

Unnamed: 0,Date,Store,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,IsHoliday
0,2010-02-05,1,42.31,2.572,,,,,,211.096358,8.106,False
1,2010-02-12,1,38.51,2.548,,,,,,211.242170,8.106,True
2,2010-02-19,1,39.93,2.514,,,,,,211.289143,8.106,False
3,2010-02-26,1,46.63,2.561,,,,,,211.319643,8.106,False
4,2010-03-05,1,46.50,2.625,,,,,,211.350143,8.106,False
...,...,...,...,...,...,...,...,...,...,...,...,...
7600,2013-03-29,45,40.68,3.784,5444.00,,350.84,53.90,1722.11,193.442790,8.625,False
7601,2013-04-05,45,43.94,3.763,16427.83,5341.41,182.59,1523.83,1743.09,193.516047,8.335,False
7602,2013-04-12,45,57.39,3.724,8760.15,1713.11,21.08,1302.31,1380.74,193.589304,8.335,False
7603,2013-04-19,45,56.27,3.676,1399.81,39.89,44.38,60.83,1445.05,193.589304,8.335,False


In [26]:
df = pd.concat([df_null,df_not_null])
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,IsHoliday,CPI,Unemployment
0,1,2013-05-03,66.66,3.386,2298.63,2.00,129.90,55.46,1301.04,False,225.186928,6.303333
1,1,2013-05-10,63.90,3.392,4624.61,83.88,42.38,1618.31,8144.90,False,225.222453,6.292667
2,1,2013-05-17,69.53,3.454,16170.50,92.00,8.32,4127.24,6206.97,False,225.273635,6.282000
3,1,2013-05-24,77.19,3.494,7959.89,178.00,1621.47,3152.57,2938.70,False,225.336379,6.271333
4,1,2013-05-31,78.02,3.466,2369.77,142.45,475.35,45.55,2056.84,False,225.407087,6.260666
...,...,...,...,...,...,...,...,...,...,...,...,...
8185,45,2013-03-29,40.68,3.784,5444.00,,350.84,53.90,1722.11,False,193.442790,8.625000
8186,45,2013-04-05,43.94,3.763,16427.83,5341.41,182.59,1523.83,1743.09,False,193.516047,8.335000
8187,45,2013-04-12,57.39,3.724,8760.15,1713.11,21.08,1302.31,1380.74,False,193.589304,8.335000
8188,45,2013-04-19,56.27,3.676,1399.81,39.89,44.38,60.83,1445.05,False,193.589304,8.335000


In [27]:
df = df.sort_values(by=['Store', 'Date'], ascending=[True, True])

In [28]:
df

Unnamed: 0,Store,Date,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,IsHoliday,CPI,Unemployment
585,1,2010-02-05,42.31,2.572,,,,,,False,211.096358,8.106000
586,1,2010-02-12,38.51,2.548,,,,,,True,211.242170,8.106000
587,1,2010-02-19,39.93,2.514,,,,,,False,211.289143,8.106000
588,1,2010-02-26,46.63,2.561,,,,,,False,211.319643,8.106000
589,1,2010-03-05,46.50,2.625,,,,,,False,211.350143,8.106000
...,...,...,...,...,...,...,...,...,...,...,...,...
580,45,2013-06-28,76.05,3.639,4842.29,975.03,3.00,2449.97,3169.69,False,194.049252,8.299803
581,45,2013-07-05,77.50,3.614,9090.48,2268.58,582.74,5797.47,1514.93,False,194.118327,8.295893
582,45,2013-07-12,79.37,3.614,3789.94,1827.31,85.72,744.84,2150.36,False,194.187760,8.291982
583,45,2013-07-19,82.84,3.737,2961.49,1047.07,204.19,363.00,1059.46,False,194.257397,8.288071


**Saving the dataframe for further usage**

In [29]:
df.to_csv("Cleaned_Features_data_set.csv", index=False)