### ESTIMATING THE DEMAND - PRICE EQUATION

In [109]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

In [110]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [111]:
df_3 = pd.read_csv('TSC_Sales_Data.csv')
print(df_3.info())
df_3.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70829 entries, 0 to 70828
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   SCRUB_ITEM      70829 non-null  int64  
 1   TIME_DIM_KEY    70829 non-null  int64  
 2   SCRUB_TRANS_NO  70829 non-null  int64  
 3   TRANS_SEQ_NO    70829 non-null  int64  
 4   UNIT_QTY        70829 non-null  float64
 5   UNIT_PRICE      70829 non-null  float64
 6   UNIT_COST       70829 non-null  float64
dtypes: float64(3), int64(4)
memory usage: 3.8 MB
None


Unnamed: 0,SCRUB_ITEM,TIME_DIM_KEY,SCRUB_TRANS_NO,TRANS_SEQ_NO,UNIT_QTY,UNIT_PRICE,UNIT_COST
0,1,20180402,1296,2,2.0,322.99,132.97
1,1,20181206,6878,1,1.0,322.99,132.97
2,1,20181210,7010,2,2.0,322.99,132.97
3,1,20190214,8380,2,1.0,322.99,132.97
4,1,20190502,10639,1,2.0,219.99,132.97


In [112]:
#Converting to a weekly level
df_3['year'] = df_3['TIME_DIM_KEY']//10000
df_3['month'] = (df_3['TIME_DIM_KEY']//100)%100
df_3['day'] = df_3['TIME_DIM_KEY']%100
df_3['date'] = df_3['year'].astype(str) +'-'+df_3['month'].astype(str) +'-'+ df_3['day'].astype(str)
df_3['date'] = pd.to_datetime(df_3['date'])
df_3['weekday'] = df_3['date'].dt.week
df_3.head()

Unnamed: 0,SCRUB_ITEM,TIME_DIM_KEY,SCRUB_TRANS_NO,TRANS_SEQ_NO,UNIT_QTY,UNIT_PRICE,UNIT_COST,year,month,day,date,weekday
0,1,20180402,1296,2,2.0,322.99,132.97,2018,4,2,2018-04-02,14
1,1,20181206,6878,1,1.0,322.99,132.97,2018,12,6,2018-12-06,49
2,1,20181210,7010,2,2.0,322.99,132.97,2018,12,10,2018-12-10,50
3,1,20190214,8380,2,1.0,322.99,132.97,2019,2,14,2019-02-14,7
4,1,20190502,10639,1,2.0,219.99,132.97,2019,5,2,2019-05-02,18


In [113]:
df_time = df_3.groupby(['year','weekday']).date.min().reset_index()
df_time = df_time[:-1]
df_time['time_id'] = 100*df_time['year'] + df_time['weekday']
df_time.head()

Unnamed: 0,year,weekday,date,time_id
0,2018,1,2018-01-01,201801
1,2018,2,2018-01-08,201802
2,2018,3,2018-01-15,201803
3,2018,4,2018-01-22,201804
4,2018,5,2018-01-29,201805


In [114]:
df_3['time_id'] = 100*df_3['year'] + df_3['weekday']
df_3['time_id'] = np.where(df_3['time_id'] == 202153, 202101, df_3['time_id'])
df_3.head()

Unnamed: 0,SCRUB_ITEM,TIME_DIM_KEY,SCRUB_TRANS_NO,TRANS_SEQ_NO,UNIT_QTY,UNIT_PRICE,UNIT_COST,year,month,day,date,weekday,time_id
0,1,20180402,1296,2,2.0,322.99,132.97,2018,4,2,2018-04-02,14,201814
1,1,20181206,6878,1,1.0,322.99,132.97,2018,12,6,2018-12-06,49,201849
2,1,20181210,7010,2,2.0,322.99,132.97,2018,12,10,2018-12-10,50,201850
3,1,20190214,8380,2,1.0,322.99,132.97,2019,2,14,2019-02-14,7,201907
4,1,20190502,10639,1,2.0,219.99,132.97,2019,5,2,2019-05-02,18,201918


In [115]:
df = df_3.groupby(['SCRUB_ITEM','time_id']).agg({'UNIT_QTY':'sum',
                                                 'UNIT_PRICE':'mean','UNIT_COST':'mean'}).reset_index()
df.head()

Unnamed: 0,SCRUB_ITEM,time_id,UNIT_QTY,UNIT_PRICE,UNIT_COST
0,1,201814,2.0,322.99,132.97
1,1,201849,1.0,322.99,132.97
2,1,201850,2.0,322.99,132.97
3,1,201901,1.0,199.99,132.97
4,1,201907,1.0,322.99,132.97


In [116]:
df['year'] = df['time_id']//100
df['week'] = df['time_id']%100

In [117]:
df = pd.merge(df,df_time, on = 'time_id', how = 'left')

In [118]:
df = df.sort_values(by = 'time_id').reset_index(drop= True)

In [119]:
df.head()

Unnamed: 0,SCRUB_ITEM,time_id,UNIT_QTY,UNIT_PRICE,UNIT_COST,year_x,week,year_y,weekday,date
0,24,201801,1.0,99.99,62.99,2018,1,2018,1,2018-01-01
1,47,201801,11.0,155.444545,118.38,2018,1,2018,1,2018-01-01
2,7,201801,1.0,149.99,128.55,2018,1,2018,1,2018-01-01
3,31,201801,16.0,69.99,48.707143,2018,1,2018,1,2018-01-01
4,71,201801,7.0,238.561429,196.52,2018,1,2018,1,2018-01-01


In [120]:
df['date'] = pd.to_datetime(df['date'])

In [121]:
#fourier features
fourier = pd.DataFrame()
fourier['date'] = df['date']
fourier = fourier.drop_duplicates()
fourier = fourier.sort_values(by = 'date')
fourier = fourier.set_index(pd.PeriodIndex(fourier['date'],freq = 'W'))
fourier['sin365'] = np.sin(2*np.pi*fourier.index.dayofyear/52)
fourier['cos365'] = np.cos(2*np.pi*fourier.index.dayofyear/52)
fourier['sin365_2'] = np.sin(4*np.pi*fourier.index.dayofyear/52)
fourier['cos365_2'] = np.cos(4*np.pi*fourier.index.dayofyear/52)


In [122]:
fourier= fourier.rename(columns = {'date':'Date'})
fourier= fourier.reset_index()
fourier = fourier.drop('date',axis =1)
fourier= fourier.rename(columns = {'Date':'date'})
fourier.head()

Unnamed: 0,date,sin365,cos365,sin365_2,cos365_2
0,2018-01-01,0.748511,0.663123,0.992709,-0.120537
1,2018-01-08,0.992709,-0.120537,-0.239316,-0.970942
2,2018-01-15,0.568065,-0.822984,-0.935016,0.354605
3,2018-01-22,-0.239316,-0.970942,0.464723,0.885456
4,2018-01-29,-0.885456,-0.464723,0.822984,-0.568065


In [123]:
fourier.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188 entries, 0 to 187
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      188 non-null    datetime64[ns]
 1   sin365    188 non-null    float64       
 2   cos365    188 non-null    float64       
 3   sin365_2  188 non-null    float64       
 4   cos365_2  188 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 7.5 KB


In [124]:
df_1 = pd.merge(df,fourier, on = ['date'])
df_1.head()

Unnamed: 0,SCRUB_ITEM,time_id,UNIT_QTY,UNIT_PRICE,UNIT_COST,year_x,week,year_y,weekday,date,sin365,cos365,sin365_2,cos365_2
0,24,201801,1.0,99.99,62.99,2018,1,2018,1,2018-01-01,0.748511,0.663123,0.992709,-0.120537
1,47,201801,11.0,155.444545,118.38,2018,1,2018,1,2018-01-01,0.748511,0.663123,0.992709,-0.120537
2,7,201801,1.0,149.99,128.55,2018,1,2018,1,2018-01-01,0.748511,0.663123,0.992709,-0.120537
3,31,201801,16.0,69.99,48.707143,2018,1,2018,1,2018-01-01,0.748511,0.663123,0.992709,-0.120537
4,71,201801,7.0,238.561429,196.52,2018,1,2018,1,2018-01-01,0.748511,0.663123,0.992709,-0.120537


In [125]:
df_1['year']=df_1['date'].dt.year
df_1['month']=df_1['date'].dt.month


In [126]:
df_1.head()
df_2=df_1.copy()

In [127]:
#Features decided using RFE Polynomial fits
df_2['col1'] = df_2['UNIT_PRICE']*df_2['month']
df_2['col2'] = df_2['UNIT_PRICE']*df_2['year']
df_2['col3'] = df_2['month']*df_2['week']
df_2['col4'] = df_2['month']*df_2['year']
df_2['col5'] = df_2['month']*df_2['year']*df_2['week']
df_2['col6'] = df_2['month']*df_2['year']*df_2['week']*df_2['UNIT_PRICE']

In [128]:
import statsmodels.api as sm  
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings('ignore')

In [129]:
#Linear Regression using best features ,and obtaining R2s
r = []
q = []
a = []
for x in list(df_2.SCRUB_ITEM.unique()):
    df_x=df_2[df_2.SCRUB_ITEM==x]
    y = df_x['UNIT_QTY']
    X = df_x[['UNIT_PRICE','sin365','col1','col2','col3','col4','col5','col6','cos365','year','month','week']]
    X = sm.add_constant(X)
    y = list(y.values)
    lm = sm.OLS(y,X).fit()
    print('r2 score for ', x, ' ',r2_score(y,lm.predict(X)))
    print('adjusted_r2 for ', x ,' ', lm.rsquared_adj)
    b = r2_score(y,lm.predict(X))
    z = lm.rsquared_adj            
    r.append(b) 
    q.append(x) 
    a.append(z)  

r2 score for  24   0.7419676261621093
adjusted_r2 for  24   0.7179646146423055
r2 score for  47   0.603366414888494
adjusted_r2 for  47   0.5278171605815405
r2 score for  7   0.42269762998438365
adjusted_r2 for  7   0.31096168740071595
r2 score for  31   0.5522236957713651
adjusted_r2 for  31   0.5204289286072018
r2 score for  71   0.7043117819968332
adjusted_r2 for  71   0.6333466096760731
r2 score for  97   0.35809687755737896
adjusted_r2 for  97   0.28742864389397116
r2 score for  33   0.15349634550870594
adjusted_r2 for  33   0.001883750674444018
r2 score for  16   0.39794323017611233
adjusted_r2 for  16   0.30411620111264925
r2 score for  75   0.40851632909789215
adjusted_r2 for  75   0.3508106051074428
r2 score for  89   0.5772473070114992
adjusted_r2 for  89   0.5432001102607473
r2 score for  49   0.6418541227596853
adjusted_r2 for  49   0.605121212273499
r2 score for  88   0.7449912565825332
adjusted_r2 for  88   0.6798826412419035
r2 score for  19   0.33681749932203553
adjuste

In [130]:
#Calculating weighted R2 and adj R2 using Unit_qty as weights
D = pd.DataFrame()
D['r2']=r
D['adj_r']=a
D['SCRUB_ITEM']=q
w = df_2.groupby(['SCRUB_ITEM'])['UNIT_QTY'].sum().reset_index()
w['UNIT_QTY_s']=w['UNIT_QTY'].sum()
w['weight']=w['UNIT_QTY']/w['UNIT_QTY_s']
D1 = pd.merge(D,w,on=['SCRUB_ITEM'],how='outer')
D1['wr']=D1['weight']*D1['r2']
D1['wa']=D1['weight']*D1['adj_r']
print(D1.wr.sum())
print(D1.wa.sum())

0.41470581058390416
0.3358436149744579


In [131]:
D[D['r2']>0.5].SCRUB_ITEM.nunique()

32

In [132]:
len(df_2.SCRUB_ITEM.unique())

97

In [133]:
df_2.columns

Index(['SCRUB_ITEM', 'time_id', 'UNIT_QTY', 'UNIT_PRICE', 'UNIT_COST',
       'year_x', 'week', 'year_y', 'weekday', 'date', 'sin365', 'cos365',
       'sin365_2', 'cos365_2', 'year', 'month', 'col1', 'col2', 'col3', 'col4',
       'col5', 'col6'],
      dtype='object')

In [134]:
#obtaining coeffs 
import statsmodels.api as sm  
from sklearn.preprocessing import PolynomialFeatures
polynomial_features= PolynomialFeatures(degree=3)
q1 = pd.DataFrame()
const_l =[]
UNIT_PRICE_l=[]
year_l=[]
sin365_l=[]
cos365_l=[]
month_l=[]
week_l=[]
scrub_l=[]
col_1_l=[]
col_2_l=[]
col_3_l=[]
col_4_l=[]
col_5_l=[]
col_6_l=[]

for x in list(df_2.SCRUB_ITEM.unique()):
    df_x=df_2[df_2.SCRUB_ITEM==x]
    y = df_x['UNIT_QTY']
    X = df_x[[  'UNIT_PRICE','year','sin365','month','cos365','week', 'col1', 'col2', 'col3', 'col4',
       'col5', 'col6']]
    X = sm.add_constant(X)
    lm = sm.OLS(y,X).fit()
    y_predict = lm.predict(X)
    A = pd.read_html(lm.summary().tables[1].as_html(),header=0,index_col=0)[0]
    const = A['coef'].values[0]
    UNIT_PRICE = A['coef'].values[1]
    year = A['coef'].values[2]
    sin365 = A['coef'].values[3]
    month = A['coef'].values[4]
    cos365 = A['coef'].values[5]
    week = A['coef'].values[6]
    col1 = A['coef'].values[7]
    col2 = A['coef'].values[8]
    col3 = A['coef'].values[9]
    col4 = A['coef'].values[10]
    col5 = A['coef'].values[11]
    col6 = A['coef'].values[12]
    print(lm.params)
    scrub_l.append(x)
    const_l.append(const)
    UNIT_PRICE_l.append(UNIT_PRICE)
    year_l.append(year)
    sin365_l.append(sin365)
    cos365_l.append(cos365)
    month_l.append(month)
    week_l.append(week)
    col_1_l.append(col1)
    col_2_l.append(col2)
    col_3_l.append(col3)
    col_4_l.append(col4)
    col_5_l.append(col5)
    col_6_l.append(col6)
    print(lm.params)

const        -148161.543624
UNIT_PRICE      1501.159103
year              73.294283
sin365             0.288921
month          -2742.678856
cos365            -0.036661
week               0.183711
col1              -0.854422
col2              -0.742586
col3              18.435446
col4               1.399809
col5              -0.009702
col6               0.000006
dtype: float64
const        -148161.543624
UNIT_PRICE      1501.159103
year              73.294283
sin365             0.288921
month          -2742.678856
cos365            -0.036661
week               0.183711
col1              -0.854422
col2              -0.742586
col3              18.435446
col4               1.399809
col5              -0.009702
col6               0.000006
dtype: float64
const         15856.040175
UNIT_PRICE      -95.385646
year             -7.815510
sin365           -0.131791
month         -1419.758009
cos365           -2.201118
week             -1.929680
col1              0.148537
col2              0.047052

dtype: float64
const        -115103.817826
UNIT_PRICE       995.996776
year              57.048629
sin365            -0.879916
month           4802.455864
cos365            -1.823555
week              -0.741136
col1               0.249929
col2              -0.493602
col3            -139.171434
col4              -2.394265
col5               0.069280
col6              -0.000002
dtype: float64
const         4.238749e+03
UNIT_PRICE   -4.594186e-01
year         -2.085049e+00
sin365        5.319296e-01
month        -2.249682e+03
cos365       -8.684680e-01
week          2.050887e-01
col1          1.368539e-03
col2          2.205999e-04
col3          5.039203e+01
col4          1.112689e+00
col5         -2.494263e-02
col6         -9.298036e-09
dtype: float64
const         4.238749e+03
UNIT_PRICE   -4.594186e-01
year         -2.085049e+00
sin365        5.319296e-01
month        -2.249682e+03
cos365       -8.684680e-01
week          2.050887e-01
col1          1.368539e-03
col2          2.205999e-

dtype: float64
const        -1.877939e+04
UNIT_PRICE    1.284105e+02
year          9.306572e+00
sin365       -4.068378e-01
month        -2.222266e+03
cos365       -5.535795e-01
week          1.599048e-01
col1         -7.510938e-02
col2         -6.362840e-02
col3          3.907726e+01
col4          1.105591e+00
col5         -1.945518e-02
col6          7.105963e-07
dtype: float64
const         1.384787e+04
UNIT_PRICE   -8.546372e+01
year         -6.846206e+00
sin365       -5.101494e-01
month         1.126221e+03
cos365       -3.992930e-01
week         -4.232657e-01
col1          5.502080e-02
col2          4.227228e-02
col3         -3.485926e+01
col4         -5.625591e-01
col5          1.741465e-02
col6         -7.061723e-07
dtype: float64
const         1.384787e+04
UNIT_PRICE   -8.546372e+01
year         -6.846206e+00
sin365       -5.101494e-01
month         1.126221e+03
cos365       -3.992930e-01
week         -4.232657e-01
col1          5.502080e-02
col2          4.227228e-02
col3      

dtype: float64
const        -5.893379e+03
UNIT_PRICE    1.977323e-02
year          2.942709e+00
sin365        1.943418e-01
month         1.452486e+03
cos365       -2.032597e-01
week          2.062906e-01
col1          1.800740e-02
col2         -4.680509e-05
col3         -2.262307e+01
col4         -7.251565e-01
col5          1.127896e-02
col6         -1.251514e-07
dtype: float64
const          -5.455196
UNIT_PRICE   -327.257230
year            0.002703
sin365         -1.190868
month          -0.597982
cos365         -1.125375
week           -0.012590
col1          -35.872959
col2            0.162162
col3          129.892740
col4            1.066677
col5           -0.000019
col6           -0.001072
dtype: float64
const          -5.455196
UNIT_PRICE   -327.257230
year            0.002703
sin365         -1.190868
month          -0.597982
cos365         -1.125375
week           -0.012590
col1          -35.872959
col2            0.162162
col3          129.892740
col4            1.066677
col5

const        -120817.681677
UNIT_PRICE       529.723591
year              59.935550
sin365             1.726665
month            381.600894
cos365             3.445192
week               0.791206
col1               0.215944
col2              -0.262846
col3             -26.544061
col4              -0.212123
col5               0.013519
col6              -0.000002
dtype: float64
const        -120817.681677
UNIT_PRICE       529.723591
year              59.935550
sin365             1.726665
month            381.600894
cos365             3.445192
week               0.791206
col1               0.215944
col2              -0.262846
col3             -26.544061
col4              -0.212123
col5               0.013519
col6              -0.000002
dtype: float64
const         1.548393e+04
UNIT_PRICE   -1.331941e+01
year         -7.663040e+00
sin365        9.512960e-01
month         1.163597e+03
cos365       -5.415977e-02
week         -1.489575e-01
col1          1.798610e-03
col2          6.592551e-03

dtype: float64
const        -154083.187329
UNIT_PRICE      1454.551685
year              76.307754
sin365            -2.185158
month         -12254.337862
cos365            -1.536185
week               1.511093
col1               0.240505
col2              -0.720366
col3             259.282955
col4               6.052986
col5              -0.128045
col6              -0.000003
dtype: float64
const        -280558.069683
UNIT_PRICE      1410.966433
year             138.870965
sin365            -0.023955
month            577.993807
cos365             0.158523
week              -0.187540
col1               0.112688
col2              -0.698383
col3              -0.188977
col4              -0.296486
col5               0.000342
col6              -0.000001
dtype: float64
const        -280558.069683
UNIT_PRICE      1410.966433
year             138.870965
sin365            -0.023955
month            577.993807
cos365             0.158523
week              -0.187540
col1               0.112688
col

dtype: float64
const        -7.437269e+04
UNIT_PRICE    5.558932e+02
year          3.683951e+01
sin365        1.660786e+00
month        -1.462463e+04
cos365       -5.629548e-01
week          1.353290e+00
col1         -6.968266e-02
col2         -2.753211e-01
col3          2.791194e+02
col4          7.245092e+00
col5         -1.382676e-01
col6          2.698849e-07
dtype: float64
const        -6.218688e+04
UNIT_PRICE    5.780500e+01
year          3.079506e+01
sin365        2.183347e-02
month         1.333863e+03
cos365        3.892187e-01
week          1.715353e-01
col1          5.880320e-03
col2         -2.862574e-02
col3         -2.382326e+01
col4         -6.633044e-01
col5          1.187702e-02
col6         -8.694314e-08
dtype: float64
const        -6.218688e+04
UNIT_PRICE    5.780500e+01
year          3.079506e+01
sin365        2.183347e-02
month         1.333863e+03
cos365        3.892187e-01
week          1.715353e-01
col1          5.880320e-03
col2         -2.862574e-02
col3      

dtype: float64
const         95515.023714
UNIT_PRICE    -3368.042652
year            -47.144025
sin365           -2.766179
month         19370.896246
cos365           -4.167468
week              1.683479
col1              2.006069
col2              1.664171
col3           -332.595365
col4             -9.632743
col5              0.165429
col6             -0.000020
dtype: float64
const            20.011709
UNIT_PRICE     3978.410485
year             -2.388900
sin365           29.289499
month        -66363.064000
cos365           31.427935
week            -11.810203
col1             -5.500399
col2             -1.957217
col3          -5955.133650
col4             33.397963
col5              2.945623
col6              0.000014
dtype: float64
const            20.011709
UNIT_PRICE     3978.410485
year             -2.388900
sin365           29.289499
month        -66363.064000
cos365           31.427935
week            -11.810203
col1             -5.500399
col2             -1.957217
col3      

In [135]:
Data=pd.DataFrame()
# df_2['col1'] = df_2['UNIT_PRICE']*df_2['month']
# df_2['col2'] = df_2['UNIT_PRICE']*df_2['year']
# df_2['col3'] = df_2['month']*df_2['week']
# df_2['col4'] = df_2['month']*df_2['year']
# df_2['col5'] = df_2['month']*df_2['year']*df_2['week']
# df_2['col6'] = df_2['month']*df_2['year']*df_2['week']*df_2['UNIT_PRICE']

Data['SCRUB_ITEM']=scrub_l
Data['UNIT_PRICE_coeff']=UNIT_PRICE_l
Data['year_coeff']=year_l
Data['sin365_coeff']=sin365_l
Data['cos365_coeff']=cos365_l
Data['month_coeff']=sin365_l
Data['week_coeff']=week_l
Data['UNIT_PRICE*month']=col_1_l
Data['UNIT_PRICE*year']=col_2_l
Data['month*week']=col_3_l
Data['month*year']=col_4_l
Data['month*year*week']=col_5_l
Data['month*year*week*UNIT_PRICE']=col_6_l

Data['constant']=const_l

Data.head()



Unnamed: 0,SCRUB_ITEM,UNIT_PRICE_coeff,year_coeff,sin365_coeff,cos365_coeff,month_coeff,week_coeff,UNIT_PRICE*month,UNIT_PRICE*year,month*week,month*year,month*year*week,month*year*week*UNIT_PRICE,constant
0,24,1501.1591,73.2943,0.2889,-0.0367,0.2889,0.1837,-0.8544,-0.7426,18.4354,1.3998,-0.0097,5.832e-06,-148200.0
1,47,-95.3856,-7.8155,-0.1318,-2.2011,-0.1318,-1.9297,0.1485,0.0471,18.7013,0.6916,-0.0089,-1.831e-06,15860.0
2,7,337.5629,23.8569,-0.5568,1.5686,-0.5568,0.6392,-0.0387,-0.1671,69.7348,2.09,-0.0346,1.82e-07,-48200.0
3,31,-772.1811,-32.1199,-1.4187,2.632,-1.4187,1.4466,-0.5315,0.3823,279.399,6.751,-0.1389,5.818e-06,64870.0
4,71,244.1711,30.504,0.1566,-0.1846,0.1566,-0.2786,-0.0421,-0.1209,-33.4638,-0.4827,0.0165,3.037e-07,-61600.0


In [136]:
df_2.SCRUB_ITEM.nunique()

97

In [137]:
#saving the dataset
Data.to_csv('Coeffs1.csv',index=False)

In [138]:
Data.columns

Index(['SCRUB_ITEM', 'UNIT_PRICE_coeff', 'year_coeff', 'sin365_coeff',
       'cos365_coeff', 'month_coeff', 'week_coeff', 'UNIT_PRICE*month',
       'UNIT_PRICE*year', 'month*week', 'month*year', 'month*year*week',
       'month*year*week*UNIT_PRICE', 'constant'],
      dtype='object')