In [51]:
# Importing the required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import clear_output,display

from sklearn.model_selection import cross_val_score,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_squared_log_error,mean_squared_error
from sklearn.linear_model import LinearRegression,Lasso,Ridge

from sklearn.preprocessing import StandardScaler,MinMaxScaler

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,BaggingRegressor

from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor

from sklearn.neighbors import KNeighborsRegressor

In [52]:
# importing the train and test datasets

train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
sample_sub=pd.read_csv("sample_submission.csv")

In [53]:
train["Week_No"]=np.ceil(train["Day_No"]/7)
train["Year_No"]=np.ceil(train["Week_No"]/52)
train["Week_No"]=(train["Week_No"]%52)

train.loc[train["Week_No"]==0,"Week_No"]=train.loc[train["Week_No"]==0,"Week_No"].apply(lambda x: x+1)

In [54]:
test["Week_No"]=np.ceil(test["Day_No"]/7)
test["Year_No"]=np.ceil(test["Week_No"]/52)
test["Week_No"]=(test["Week_No"]%52)

test.loc[test["Week_No"]==0,"Week_No"]=test.loc[test["Week_No"]==0,"Week_No"].apply(lambda x: x+1)

In [55]:
train

Unnamed: 0,ID,Day_No,Course_ID,Course_Domain,Course_Type,Short_Promotion,Public_Holiday,Long_Promotion,User_Traffic,Competition_Metric,Sales,Week_No,Year_No
0,1,1,1,Development,Course,0,1,1,11004,0.007,81,1.0,1.0
1,2,2,1,Development,Course,0,0,1,13650,0.007,79,1.0,1.0
2,3,3,1,Development,Course,0,0,1,11655,0.007,75,1.0,1.0
3,4,4,1,Development,Course,0,0,1,12054,0.007,80,1.0,1.0
4,5,5,1,Development,Course,0,0,1,6804,0.007,41,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512082,548023,878,600,Software Marketing,Program,0,0,1,8904,0.070,114,22.0,3.0
512083,548024,879,600,Software Marketing,Program,0,0,1,10542,0.070,145,22.0,3.0
512084,548025,880,600,Software Marketing,Program,0,0,1,13671,0.070,167,22.0,3.0
512085,548026,881,600,Software Marketing,Program,0,0,1,8904,0.070,107,22.0,3.0


In [56]:
# Imputing Missing Values

train.loc[(train["Course_ID"]==354) | (train["Course_ID"]==176),"Competition_Metric"]=train.groupby("Course_ID")["Competition_Metric"].mean().mean()
test.loc[(test["Course_ID"]==354) | (test["Course_ID"]==176),"Competition_Metric"]=train.groupby("Course_ID")["Competition_Metric"].mean().mean()

In [57]:
mean_by_course=train.groupby(["Course_ID","Week_No","Short_Promotion"])["Sales"].mean().to_frame()
mean_by_course.reset_index(inplace=True)
mean_by_course["Target_Course_ID"]=mean_by_course["Sales"]
mean_by_course.drop(columns="Sales",inplace=True)
mean_by_course

Unnamed: 0,Course_ID,Week_No,Short_Promotion,Target_Course_ID
0,1,1.0,0,57.125000
1,1,1.0,1,134.666667
2,1,2.0,0,70.125000
3,1,2.0,1,107.615385
4,1,3.0,0,66.133333
...,...,...,...,...
61195,600,49.0,1,154.125000
61196,600,50.0,0,105.333333
61197,600,50.0,1,216.500000
61198,600,51.0,0,189.166667


In [58]:
train=train.merge(mean_by_course)
test=test.merge(mean_by_course)

In [59]:
# mean_by_week=train.groupby(["Week_No"])["Sales"].mean().to_frame()
# mean_by_week.reset_index(inplace=True)
# mean_by_week["Week_No_Group"]=mean_by_week["Sales"]
# mean_by_week.drop(columns="Sales",inplace=True)
# mean_by_week

In [60]:
# train=train.merge(mean_by_week)
# test=test.merge(mean_by_week)

In [61]:
mean_UT=train.groupby(["Course_ID","Week_No","Short_Promotion"])["User_Traffic"].mean().to_frame()
mean_UT.reset_index(inplace=True)
mean_UT["Group_Traffic"]=mean_UT["User_Traffic"]
mean_UT.drop(columns="User_Traffic",inplace=True)
mean_UT

Unnamed: 0,Course_ID,Week_No,Short_Promotion,Group_Traffic
0,1,1.0,0,8266.125000
1,1,1.0,1,16387.000000
2,1,2.0,0,10085.250000
3,1,2.0,1,13919.769231
4,1,3.0,0,9888.200000
...,...,...,...,...
61195,600,49.0,1,11009.250000
61196,600,50.0,0,8561.000000
61197,600,50.0,1,14563.500000
61198,600,51.0,0,14654.500000


In [62]:
train=train.merge(mean_UT)
test=test.merge(mean_UT)

In [63]:
train.drop(columns=["ID","Year_No","User_Traffic","Course_ID","Day_No","Week_No"],inplace=True)
test.drop(columns=["ID","Year_No","Course_ID","Day_No","Week_No"],inplace=True)

In [64]:
train

Unnamed: 0,Course_Domain,Course_Type,Short_Promotion,Public_Holiday,Long_Promotion,Competition_Metric,Sales,Target_Course_ID,Group_Traffic
0,Development,Course,0,1,1,0.007,81,57.125000,8266.125
1,Development,Course,0,0,1,0.007,79,57.125000,8266.125
2,Development,Course,0,0,1,0.007,75,57.125000,8266.125
3,Development,Course,0,0,1,0.007,80,57.125000,8266.125
4,Development,Course,0,0,1,0.007,41,57.125000,8266.125
...,...,...,...,...,...,...,...,...,...
512082,Software Marketing,Program,0,0,1,0.070,166,189.166667,14654.500
512083,Software Marketing,Program,0,0,1,0.070,212,189.166667,14654.500
512084,Software Marketing,Program,0,0,1,0.070,197,189.166667,14654.500
512085,Software Marketing,Program,0,0,1,0.070,182,189.166667,14654.500


In [65]:
train["Sales"]=np.log1p(train["Sales"])

In [66]:
def export_submission(predictions,filename="Sample.csv"):
    
    ss=sample_sub.copy()
    ss["Sales"]=np.expm1(predictions)
    ss.to_csv(f"{filename}.csv",index=False)

In [67]:
train.dtypes

Course_Domain          object
Course_Type            object
Short_Promotion         int64
Public_Holiday          int64
Long_Promotion          int64
Competition_Metric    float64
Sales                 float64
Target_Course_ID      float64
Group_Traffic         float64
dtype: object

In [68]:
train=pd.get_dummies(train,drop_first=True)
test=pd.get_dummies(test,drop_first=True)

In [69]:
X_train=train.drop(columns="Sales")
y_train=train["Sales"]

In [71]:
train.corr()

Unnamed: 0,Short_Promotion,Public_Holiday,Long_Promotion,Competition_Metric,Sales,Target_Course_ID,Group_Traffic,Course_Domain_Development,Course_Domain_Finance & Accounting,Course_Domain_Software Marketing,Course_Type_Degree,Course_Type_Program
Short_Promotion,1.0,-0.01877,-0.001149,0.000182,0.372352,0.407371,0.21154,0.000253,0.000308,-0.000532,8.6e-05,0.000312
Public_Holiday,-0.01877,1.0,0.008319,-0.000491,-0.233028,-0.010931,-0.013126,-0.001631,-0.002116,0.003538,3.6e-05,-0.002436
Long_Promotion,-0.001149,0.008319,1.0,-0.119171,-0.09362,-0.121244,-0.217397,-0.11197,-0.050691,0.161533,0.001298,0.020038
Competition_Metric,0.000182,-0.000491,-0.119171,1.0,-0.017942,-0.024423,-0.158152,-0.044816,-0.126733,0.155949,-0.037263,0.140105
Sales,0.372352,-0.233028,-0.09362,-0.017942,1.0,0.800013,0.640404,-0.045996,0.03403,0.004008,0.063425,0.136004
Target_Course_ID,0.407371,-0.010931,-0.121244,-0.024423,0.800013,1.0,0.82044,-0.017059,0.031714,-0.030682,0.075764,0.146283
Group_Traffic,0.21154,-0.013126,-0.217397,-0.158152,0.640404,0.82044,1.0,0.117095,0.110001,-0.284415,0.340003,0.018001
Course_Domain_Development,0.000253,-0.001631,-0.11197,-0.044816,-0.045996,-0.017059,0.117095,1.0,-0.435165,-0.715806,-0.060719,-0.169256
Course_Domain_Finance & Accounting,0.000308,-0.002116,-0.050691,-0.126733,0.03403,0.031714,0.110001,-0.435165,1.0,-0.292044,-0.024773,-0.00151
Course_Domain_Software Marketing,-0.000532,0.003538,0.161533,0.155949,0.004008,-0.030682,-0.284415,-0.715806,-0.292044,1.0,-0.040749,0.199612


## Fitting Linear Model

In [20]:
lr=LinearRegression(normalize=True)
lr.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=True)

In [21]:
pd.Series(lr.predict(test)).value_counts()

4.788439    6
4.959092    6
4.318944    6
4.636450    6
4.297956    6
           ..
4.681904    1
5.372385    1
4.986669    1
5.189020    1
5.158190    1
Length: 10594, dtype: int64

In [22]:
y_train_pred=lr.predict(X_train)
y_test_pred=lr.predict(test)

In [23]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

255.10658692928467

In [80]:
export_submission(y_test_pred,"Linear Regression Featuress")

## Grid Search For Linear Model

In [170]:
parameters={"normalize":["True","False"]}

In [171]:
search=GridSearchCV(LinearRegression(),param_grid=parameters,scoring='neg_mean_squared_error',cv=5)

In [172]:
search.fit(X_train,y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                        n_jobs=None, normalize=False),
             iid='deprecated', n_jobs=None,
             param_grid={'normalize': ['True', 'False']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [173]:
search.best_params_

{'normalize': 'True'}

In [174]:
search.best_score_

-0.06520298933937438

## Grid Search For Lasso Model

In [81]:
parameters={"normalize":["True","False"],
           "alpha":[0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1]}

In [82]:
search=GridSearchCV(Lasso(random_state=10),parameters,scoring='neg_mean_squared_error',cv=5,n_jobs=-1,verbose=2)

In [83]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 14 candidates, totalling 70 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   30.6s
[Parallel(n_jobs=-1)]: Done  70 out of  70 | elapsed:   44.5s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=10,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'alpha': [1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01,
                                   0.1],
                         'normalize': ['True', 'False']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [84]:
search.best_params_

{'alpha': 1e-07, 'normalize': 'True'}

In [85]:
search.best_score_

-0.06546828080092272

In [86]:
lasso=Lasso(alpha=1e-07,normalize=True)
lasso.fit(X_train,y_train)

Lasso(alpha=1e-07, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=True, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [87]:
y_train_pred=lasso.predict(X_train)
y_test_pred=lasso.predict(test)

In [88]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

255.10707897732055

In [89]:
lasso.coef_

array([ 4.68227635e-02, -5.89302631e-01, -5.45977788e-03, -3.37898389e-02,
        7.51259364e-03, -2.04450712e-06, -2.29762469e-02,  4.70764531e-04,
        1.67081779e-03,  1.01580862e-01,  1.50476481e-02])

In [90]:
X_train

Unnamed: 0,Short_Promotion,Public_Holiday,Long_Promotion,Competition_Metric,Target_Course_ID,Group_Traffic,Course_Domain_Development,Course_Domain_Finance & Accounting,Course_Domain_Software Marketing,Course_Type_Degree,Course_Type_Program
0,0,1,1,0.007,57.125000,8266.125,1,0,0,0,0
1,0,0,1,0.007,57.125000,8266.125,1,0,0,0,0
2,0,0,1,0.007,57.125000,8266.125,1,0,0,0,0
3,0,0,1,0.007,57.125000,8266.125,1,0,0,0,0
4,0,0,1,0.007,57.125000,8266.125,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
512082,0,0,1,0.070,189.166667,14654.500,0,0,1,0,1
512083,0,0,1,0.070,189.166667,14654.500,0,0,1,0,1
512084,0,0,1,0.070,189.166667,14654.500,0,0,1,0,1
512085,0,0,1,0.070,189.166667,14654.500,0,0,1,0,1


In [91]:
export_submission(y_test_pred,"Lasso Regression Engineered Features")

## Grid Search For Tree Model

In [20]:
tree=DecisionTreeRegressor()

In [21]:
tree.fit(X_train,y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [22]:
y_train_pred=tree.predict(X_train)
y_test_pred=tree.predict(test)

In [23]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

6.4268327209905796e-15

In [193]:
export_submission(y_test_pred,"Tree Engineered Features")

In [25]:
tree.get_depth()

69

In [93]:
parameters={"max_depth":range(2,69)}

In [94]:
search=GridSearchCV(DecisionTreeRegressor(random_state=10),parameters,scoring='neg_mean_squared_error',cv=5,n_jobs=6,verbose=2)

In [95]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 67 candidates, totalling 335 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:    5.7s
[Parallel(n_jobs=6)]: Done 150 tasks      | elapsed:   40.6s
[Parallel(n_jobs=6)]: Done 335 out of 335 | elapsed:  1.7min finished


GridSearchCV(cv=5, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=None, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=10, splitter='best'),
             iid='deprecated', n_jobs=6, param_grid={'max_depth': range(2, 69)},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [96]:
search.best_params_

{'max_depth': 7}

In [97]:
search.best_score_

-0.05472307682371338

In [98]:
tree=DecisionTreeRegressor(max_depth= 7)

In [99]:
tree.fit(X_train,y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=7,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=None, splitter='best')

In [100]:
y_train_pred=tree.predict(X_train)
y_test_pred=tree.predict(test)

In [101]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

232.0956438112122

In [102]:
export_submission(y_test_pred,"Tree Max Depth Engineered Features")

## Random Forests

In [109]:
forest=RandomForestRegressor(max_features=None,n_estimators=400,n_jobs=8)

In [110]:
forest.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features=None, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=400, n_jobs=8, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [111]:
y_train_pred=forest.predict(X_train)
y_test_pred=forest.predict(test)

In [112]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

216.13655175210727

In [113]:
export_submission(y_test_pred,"Forest Engineered Features")

In [114]:
parameters={"max_depth":range(2,20),'max_samples':[0.5,0.6,0.7,0.8]}

In [115]:
search=RandomizedSearchCV(RandomForestRegressor(random_state=10,max_features=None,n_estimators=200,n_jobs=6),parameters,scoring='neg_mean_squared_error',cv=5,n_jobs=6,verbose=2,n_iter=20)

In [116]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed: 17.2min
[Parallel(n_jobs=6)]: Done 100 out of 100 | elapsed: 77.8min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=RandomForestRegressor(bootstrap=True,
                                                   ccp_alpha=0.0,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features=None,
                                                   max_leaf_nodes=None,
                                                   max_samples=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   n_estimators=200, n_jobs=6,
                      

In [117]:
search.best_params_

{'max_samples': 0.8, 'max_depth': 7}

In [38]:
search.best_score_

-0.03500106369665605

In [119]:
tree=RandomForestRegressor(max_depth= 7, max_samples=0.8)

In [120]:
tree.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=7, max_features='auto', max_leaf_nodes=None,
                      max_samples=0.8, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [121]:
y_train_pred=tree.predict(X_train)
y_test_pred=tree.predict(test)

In [122]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

231.47714048793725

In [123]:
export_submission(y_test_pred,"Random Forest Tuned Engineered Features")

## LightGBM

In [21]:
regressor=LGBMRegressor()

In [22]:
regressor.fit(X_train,y_train)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.1, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [23]:
y_train_pred=regressor.predict(X_train)
y_test_pred=regressor.predict(test)

In [24]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

195.30549317052976

In [25]:
export_submission(y_test_pred,"LGBM Default Engineered Features")

In [166]:
parameters={'learning_rate':np.arange(0.001,0.201,0.001)}

In [172]:
search=RandomizedSearchCV(LGBMRegressor(n_estimators=400,random_state=10),parameters,scoring='neg_mean_squared_error',n_jobs=-1,cv=5,verbose=2,n_iter=20)

In [173]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   50.9s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  6.5min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=LGBMRegressor(boosting_type='gbdt',
                                           class_weight=None,
                                           colsample_bytree=1.0,
                                           importance_type='split',
                                           learning_rate=0.1, max_depth=-1,
                                           min_child_samples=20,
                                           min_child_weight=0.001,
                                           min_split_gain=0.0, n_estimators=400,
                                           n_jobs=-1, num_leaves=31,
                                           objective=None, random_state=10,
                                           reg_alpha=0.0, reg_lambda=0.0,
                                           silent=...
       0.154, 0.155, 0.156, 0.157, 0.158, 0.159, 0.16 , 0.161, 0.162,
       0.163, 0.164, 0.165, 0.166, 0.167, 0.168, 0.169, 0.17 , 0.1

In [174]:
search.best_params_

{'learning_rate': 0.017}

In [175]:
search.best_score_

-0.05427853762690869

In [170]:
search.cv_results_

AttributeError: 'RandomizedSearchCV' object has no attribute 'cv_results_'

In [176]:
booster=LGBMRegressor(n_estimators=400,learning_rate=0.017,random_state=10)

In [177]:
booster.fit(X_train,y_train)

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.017, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=400, n_jobs=-1, num_leaves=31, objective=None,
              random_state=10, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [178]:
y_train_pred=booster.predict(X_train)
y_test_pred=booster.predict(test)

In [179]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

230.55714438125648

In [180]:
export_submission(y_test_pred,"LGBM Optimized Engineered Features")

## KNN

In [None]:
scaler=MinMaxScaler()
X_train=scaler.fit_transform(X_train)


In [25]:
knn=KNeighborsRegressor(n_neighbors= 17,n_jobs=-1)

In [None]:
knn.fit(X_train,y_train)

In [138]:
y_train_pred=forest.predict(X_train)
y_test_pred=forest.predict(test)

In [139]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

216.13655175210727

In [140]:
export_submission(y_test_pred,"KNN 17 Engineered Features")

In [131]:
parameters={'n_neighbors':range(1,50,1)}

In [132]:
search=GridSearchCV(KNeighborsRegressor(n_jobs=6),parameters,scoring='neg_mean_squared_error',cv=5,verbose=2,n_jobs=6)

In [133]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 49 candidates, totalling 245 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:   30.4s
[Parallel(n_jobs=10)]: Done 142 tasks      | elapsed:  3.2min
[Parallel(n_jobs=10)]: Done 245 out of 245 | elapsed:  5.7min finished


GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30,
                                           metric='minkowski',
                                           metric_params=None, n_jobs=None,
                                           n_neighbors=5, p=2,
                                           weights='uniform'),
             iid='deprecated', n_jobs=10,
             param_grid={'n_neighbors': range(1, 50)}, pre_dispatch='2*n_jobs',
             refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [134]:
search.best_params_

{'n_neighbors': 17}

In [135]:
search.best_score_

-0.06775012845127092

## Catboost

In [41]:
boost=CatBoostRegressor(learning_rate=0.1)

In [42]:
boost.fit(X_train,y_train)

0:	learn: 0.4298931	total: 37.1ms	remaining: 37s
1:	learn: 0.4020193	total: 71.8ms	remaining: 35.8s
2:	learn: 0.3775951	total: 112ms	remaining: 37.3s
3:	learn: 0.3566886	total: 151ms	remaining: 37.7s
4:	learn: 0.3381898	total: 190ms	remaining: 37.8s
5:	learn: 0.3222246	total: 225ms	remaining: 37.2s
6:	learn: 0.3083741	total: 261ms	remaining: 37s
7:	learn: 0.2966586	total: 296ms	remaining: 36.7s
8:	learn: 0.2864590	total: 331ms	remaining: 36.5s
9:	learn: 0.2779455	total: 371ms	remaining: 36.7s
10:	learn: 0.2705556	total: 407ms	remaining: 36.6s
11:	learn: 0.2643659	total: 441ms	remaining: 36.3s
12:	learn: 0.2591858	total: 476ms	remaining: 36.2s
13:	learn: 0.2548900	total: 511ms	remaining: 36s
14:	learn: 0.2512253	total: 545ms	remaining: 35.8s
15:	learn: 0.2481811	total: 577ms	remaining: 35.5s
16:	learn: 0.2457123	total: 609ms	remaining: 35.2s
17:	learn: 0.2435781	total: 641ms	remaining: 35s
18:	learn: 0.2418099	total: 676ms	remaining: 34.9s
19:	learn: 0.2403348	total: 713ms	remaining: 34

164:	learn: 0.2314863	total: 5.79s	remaining: 29.3s
165:	learn: 0.2314773	total: 5.83s	remaining: 29.3s
166:	learn: 0.2314755	total: 5.86s	remaining: 29.2s
167:	learn: 0.2314708	total: 5.89s	remaining: 29.2s
168:	learn: 0.2314625	total: 5.93s	remaining: 29.2s
169:	learn: 0.2314544	total: 5.98s	remaining: 29.2s
170:	learn: 0.2314496	total: 6.01s	remaining: 29.1s
171:	learn: 0.2314432	total: 6.04s	remaining: 29.1s
172:	learn: 0.2314369	total: 6.08s	remaining: 29s
173:	learn: 0.2314298	total: 6.11s	remaining: 29s
174:	learn: 0.2314219	total: 6.15s	remaining: 29s
175:	learn: 0.2314182	total: 6.18s	remaining: 28.9s
176:	learn: 0.2314126	total: 6.21s	remaining: 28.9s
177:	learn: 0.2314083	total: 6.25s	remaining: 28.8s
178:	learn: 0.2314043	total: 6.28s	remaining: 28.8s
179:	learn: 0.2313975	total: 6.32s	remaining: 28.8s
180:	learn: 0.2313921	total: 6.35s	remaining: 28.7s
181:	learn: 0.2313839	total: 6.39s	remaining: 28.7s
182:	learn: 0.2313789	total: 6.42s	remaining: 28.6s
183:	learn: 0.2313

325:	learn: 0.2306196	total: 11.5s	remaining: 23.7s
326:	learn: 0.2306085	total: 11.5s	remaining: 23.7s
327:	learn: 0.2306028	total: 11.6s	remaining: 23.7s
328:	learn: 0.2305975	total: 11.6s	remaining: 23.6s
329:	learn: 0.2305950	total: 11.6s	remaining: 23.6s
330:	learn: 0.2305890	total: 11.7s	remaining: 23.6s
331:	learn: 0.2305851	total: 11.7s	remaining: 23.5s
332:	learn: 0.2305788	total: 11.7s	remaining: 23.5s
333:	learn: 0.2305724	total: 11.8s	remaining: 23.5s
334:	learn: 0.2305676	total: 11.8s	remaining: 23.4s
335:	learn: 0.2305610	total: 11.8s	remaining: 23.4s
336:	learn: 0.2305527	total: 11.9s	remaining: 23.4s
337:	learn: 0.2305478	total: 11.9s	remaining: 23.4s
338:	learn: 0.2305409	total: 12s	remaining: 23.4s
339:	learn: 0.2305352	total: 12s	remaining: 23.3s
340:	learn: 0.2305324	total: 12.1s	remaining: 23.3s
341:	learn: 0.2305264	total: 12.1s	remaining: 23.3s
342:	learn: 0.2305242	total: 12.1s	remaining: 23.3s
343:	learn: 0.2305226	total: 12.2s	remaining: 23.2s
344:	learn: 0.23

485:	learn: 0.2299213	total: 17.3s	remaining: 18.2s
486:	learn: 0.2299180	total: 17.3s	remaining: 18.2s
487:	learn: 0.2299146	total: 17.3s	remaining: 18.2s
488:	learn: 0.2299103	total: 17.4s	remaining: 18.1s
489:	learn: 0.2299016	total: 17.4s	remaining: 18.1s
490:	learn: 0.2299004	total: 17.4s	remaining: 18.1s
491:	learn: 0.2298986	total: 17.5s	remaining: 18s
492:	learn: 0.2298946	total: 17.5s	remaining: 18s
493:	learn: 0.2298897	total: 17.6s	remaining: 18s
494:	learn: 0.2298849	total: 17.6s	remaining: 18s
495:	learn: 0.2298799	total: 17.6s	remaining: 17.9s
496:	learn: 0.2298781	total: 17.7s	remaining: 17.9s
497:	learn: 0.2298722	total: 17.7s	remaining: 17.9s
498:	learn: 0.2298658	total: 17.8s	remaining: 17.8s
499:	learn: 0.2298634	total: 17.8s	remaining: 17.8s
500:	learn: 0.2298610	total: 17.8s	remaining: 17.8s
501:	learn: 0.2298571	total: 17.9s	remaining: 17.7s
502:	learn: 0.2298483	total: 17.9s	remaining: 17.7s
503:	learn: 0.2298470	total: 18s	remaining: 17.7s
504:	learn: 0.2298451	

648:	learn: 0.2292733	total: 23.3s	remaining: 12.6s
649:	learn: 0.2292676	total: 23.3s	remaining: 12.5s
650:	learn: 0.2292634	total: 23.3s	remaining: 12.5s
651:	learn: 0.2292616	total: 23.4s	remaining: 12.5s
652:	learn: 0.2292587	total: 23.4s	remaining: 12.4s
653:	learn: 0.2292574	total: 23.4s	remaining: 12.4s
654:	learn: 0.2292544	total: 23.5s	remaining: 12.4s
655:	learn: 0.2292539	total: 23.5s	remaining: 12.3s
656:	learn: 0.2292519	total: 23.6s	remaining: 12.3s
657:	learn: 0.2292489	total: 23.6s	remaining: 12.3s
658:	learn: 0.2292472	total: 23.6s	remaining: 12.2s
659:	learn: 0.2292444	total: 23.7s	remaining: 12.2s
660:	learn: 0.2292413	total: 23.7s	remaining: 12.2s
661:	learn: 0.2292370	total: 23.8s	remaining: 12.1s
662:	learn: 0.2292329	total: 23.8s	remaining: 12.1s
663:	learn: 0.2292275	total: 23.8s	remaining: 12.1s
664:	learn: 0.2292254	total: 23.9s	remaining: 12s
665:	learn: 0.2292218	total: 23.9s	remaining: 12s
666:	learn: 0.2292135	total: 23.9s	remaining: 12s
667:	learn: 0.2292

807:	learn: 0.2287526	total: 29s	remaining: 6.89s
808:	learn: 0.2287490	total: 29s	remaining: 6.85s
809:	learn: 0.2287447	total: 29.1s	remaining: 6.82s
810:	learn: 0.2287437	total: 29.1s	remaining: 6.78s
811:	learn: 0.2287424	total: 29.1s	remaining: 6.74s
812:	learn: 0.2287335	total: 29.2s	remaining: 6.71s
813:	learn: 0.2287309	total: 29.2s	remaining: 6.67s
814:	learn: 0.2287268	total: 29.2s	remaining: 6.64s
815:	learn: 0.2287247	total: 29.3s	remaining: 6.6s
816:	learn: 0.2287198	total: 29.3s	remaining: 6.56s
817:	learn: 0.2287185	total: 29.4s	remaining: 6.53s
818:	learn: 0.2287105	total: 29.4s	remaining: 6.5s
819:	learn: 0.2287084	total: 29.4s	remaining: 6.46s
820:	learn: 0.2287060	total: 29.5s	remaining: 6.42s
821:	learn: 0.2287020	total: 29.5s	remaining: 6.38s
822:	learn: 0.2286991	total: 29.5s	remaining: 6.35s
823:	learn: 0.2286922	total: 29.6s	remaining: 6.32s
824:	learn: 0.2286902	total: 29.6s	remaining: 6.28s
825:	learn: 0.2286882	total: 29.6s	remaining: 6.24s
826:	learn: 0.2286

968:	learn: 0.2282755	total: 34.8s	remaining: 1.11s
969:	learn: 0.2282739	total: 34.9s	remaining: 1.08s
970:	learn: 0.2282714	total: 34.9s	remaining: 1.04s
971:	learn: 0.2282664	total: 34.9s	remaining: 1.01s
972:	learn: 0.2282624	total: 35s	remaining: 970ms
973:	learn: 0.2282597	total: 35s	remaining: 934ms
974:	learn: 0.2282559	total: 35s	remaining: 898ms
975:	learn: 0.2282525	total: 35.1s	remaining: 863ms
976:	learn: 0.2282523	total: 35.1s	remaining: 827ms
977:	learn: 0.2282468	total: 35.1s	remaining: 791ms
978:	learn: 0.2282454	total: 35.2s	remaining: 755ms
979:	learn: 0.2282402	total: 35.2s	remaining: 719ms
980:	learn: 0.2282370	total: 35.3s	remaining: 683ms
981:	learn: 0.2282346	total: 35.3s	remaining: 647ms
982:	learn: 0.2282339	total: 35.3s	remaining: 611ms
983:	learn: 0.2282294	total: 35.4s	remaining: 575ms
984:	learn: 0.2282283	total: 35.4s	remaining: 539ms
985:	learn: 0.2282274	total: 35.4s	remaining: 503ms
986:	learn: 0.2282248	total: 35.5s	remaining: 467ms
987:	learn: 0.2282

<catboost.core.CatBoostRegressor at 0x20897fcd4c8>

In [43]:
y_train_pred=boost.predict(X_train)
y_test_pred=boost.predict(test)

In [44]:
1000*np.sqrt(mean_squared_error(y_train,y_train_pred))

228.1863642663267

In [45]:
export_submission(y_test_pred,"Boost Cat 0.1")

In [46]:
parameters={'learning_rate':np.arange(0.01,0.1,0.01)}

In [47]:
search=GridSearchCV(CatBoostRegressor(),parameters,scoring='neg_mean_squared_error',cv=5,verbose=2,n_jobs=10)

In [48]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed: 14.9min
[Parallel(n_jobs=10)]: Done  45 out of  45 | elapsed: 23.2min finished


0:	learn: 0.4581315	total: 41.1ms	remaining: 41s
1:	learn: 0.4549662	total: 81.9ms	remaining: 40.9s
2:	learn: 0.4518408	total: 124ms	remaining: 41.3s
3:	learn: 0.4487626	total: 165ms	remaining: 41.1s
4:	learn: 0.4457285	total: 202ms	remaining: 40.3s
5:	learn: 0.4427298	total: 239ms	remaining: 39.7s
6:	learn: 0.4397585	total: 279ms	remaining: 39.6s
7:	learn: 0.4368247	total: 320ms	remaining: 39.6s
8:	learn: 0.4339013	total: 358ms	remaining: 39.4s
9:	learn: 0.4310652	total: 400ms	remaining: 39.6s
10:	learn: 0.4282187	total: 440ms	remaining: 39.6s
11:	learn: 0.4254297	total: 484ms	remaining: 39.8s
12:	learn: 0.4226888	total: 523ms	remaining: 39.7s
13:	learn: 0.4199831	total: 563ms	remaining: 39.7s
14:	learn: 0.4173058	total: 604ms	remaining: 39.7s
15:	learn: 0.4146414	total: 642ms	remaining: 39.5s
16:	learn: 0.4120248	total: 685ms	remaining: 39.6s
17:	learn: 0.4094287	total: 725ms	remaining: 39.5s
18:	learn: 0.4068415	total: 763ms	remaining: 39.4s
19:	learn: 0.4043302	total: 801ms	remaini

166:	learn: 0.2483624	total: 6.48s	remaining: 32.3s
167:	learn: 0.2480952	total: 6.51s	remaining: 32.2s
168:	learn: 0.2478364	total: 6.55s	remaining: 32.2s
169:	learn: 0.2475800	total: 6.58s	remaining: 32.2s
170:	learn: 0.2473318	total: 6.62s	remaining: 32.1s
171:	learn: 0.2470841	total: 6.66s	remaining: 32.1s
172:	learn: 0.2468403	total: 6.71s	remaining: 32.1s
173:	learn: 0.2465988	total: 6.74s	remaining: 32s
174:	learn: 0.2463623	total: 6.78s	remaining: 32s
175:	learn: 0.2461301	total: 6.82s	remaining: 31.9s
176:	learn: 0.2459005	total: 6.86s	remaining: 31.9s
177:	learn: 0.2456802	total: 6.9s	remaining: 31.8s
178:	learn: 0.2454685	total: 6.94s	remaining: 31.8s
179:	learn: 0.2452494	total: 6.97s	remaining: 31.8s
180:	learn: 0.2450387	total: 7.01s	remaining: 31.7s
181:	learn: 0.2448321	total: 7.05s	remaining: 31.7s
182:	learn: 0.2446227	total: 7.08s	remaining: 31.6s
183:	learn: 0.2444227	total: 7.11s	remaining: 31.6s
184:	learn: 0.2442284	total: 7.15s	remaining: 31.5s
185:	learn: 0.244

326:	learn: 0.2338696	total: 12.2s	remaining: 25.1s
327:	learn: 0.2338512	total: 12.2s	remaining: 25.1s
328:	learn: 0.2338335	total: 12.3s	remaining: 25s
329:	learn: 0.2338171	total: 12.3s	remaining: 25s
330:	learn: 0.2338011	total: 12.3s	remaining: 24.9s
331:	learn: 0.2337844	total: 12.4s	remaining: 24.9s
332:	learn: 0.2337676	total: 12.4s	remaining: 24.9s
333:	learn: 0.2337504	total: 12.5s	remaining: 24.8s
334:	learn: 0.2337341	total: 12.5s	remaining: 24.8s
335:	learn: 0.2337202	total: 12.5s	remaining: 24.7s
336:	learn: 0.2337040	total: 12.6s	remaining: 24.7s
337:	learn: 0.2336880	total: 12.6s	remaining: 24.7s
338:	learn: 0.2336744	total: 12.6s	remaining: 24.6s
339:	learn: 0.2336622	total: 12.7s	remaining: 24.6s
340:	learn: 0.2336474	total: 12.7s	remaining: 24.6s
341:	learn: 0.2336335	total: 12.8s	remaining: 24.5s
342:	learn: 0.2336195	total: 12.8s	remaining: 24.5s
343:	learn: 0.2336064	total: 12.8s	remaining: 24.5s
344:	learn: 0.2335939	total: 12.9s	remaining: 24.4s
345:	learn: 0.23

487:	learn: 0.2327487	total: 18s	remaining: 18.8s
488:	learn: 0.2327467	total: 18s	remaining: 18.8s
489:	learn: 0.2327445	total: 18s	remaining: 18.8s
490:	learn: 0.2327426	total: 18.1s	remaining: 18.7s
491:	learn: 0.2327380	total: 18.1s	remaining: 18.7s
492:	learn: 0.2327347	total: 18.1s	remaining: 18.7s
493:	learn: 0.2327317	total: 18.2s	remaining: 18.6s
494:	learn: 0.2327275	total: 18.2s	remaining: 18.6s
495:	learn: 0.2327241	total: 18.3s	remaining: 18.6s
496:	learn: 0.2327225	total: 18.3s	remaining: 18.5s
497:	learn: 0.2327200	total: 18.3s	remaining: 18.5s
498:	learn: 0.2327153	total: 18.4s	remaining: 18.4s
499:	learn: 0.2327138	total: 18.4s	remaining: 18.4s
500:	learn: 0.2327112	total: 18.4s	remaining: 18.4s
501:	learn: 0.2327083	total: 18.5s	remaining: 18.3s
502:	learn: 0.2327068	total: 18.5s	remaining: 18.3s
503:	learn: 0.2327031	total: 18.5s	remaining: 18.3s
504:	learn: 0.2327013	total: 18.6s	remaining: 18.2s
505:	learn: 0.2326992	total: 18.6s	remaining: 18.2s
506:	learn: 0.2326

649:	learn: 0.2324147	total: 23.8s	remaining: 12.8s
650:	learn: 0.2324136	total: 23.8s	remaining: 12.8s
651:	learn: 0.2324114	total: 23.9s	remaining: 12.8s
652:	learn: 0.2324095	total: 23.9s	remaining: 12.7s
653:	learn: 0.2324072	total: 24s	remaining: 12.7s
654:	learn: 0.2324057	total: 24s	remaining: 12.6s
655:	learn: 0.2324042	total: 24s	remaining: 12.6s
656:	learn: 0.2324024	total: 24.1s	remaining: 12.6s
657:	learn: 0.2324017	total: 24.1s	remaining: 12.5s
658:	learn: 0.2324000	total: 24.1s	remaining: 12.5s
659:	learn: 0.2323998	total: 24.2s	remaining: 12.5s
660:	learn: 0.2323984	total: 24.2s	remaining: 12.4s
661:	learn: 0.2323956	total: 24.3s	remaining: 12.4s
662:	learn: 0.2323941	total: 24.3s	remaining: 12.3s
663:	learn: 0.2323929	total: 24.3s	remaining: 12.3s
664:	learn: 0.2323917	total: 24.4s	remaining: 12.3s
665:	learn: 0.2323905	total: 24.4s	remaining: 12.2s
666:	learn: 0.2323888	total: 24.4s	remaining: 12.2s
667:	learn: 0.2323866	total: 24.5s	remaining: 12.2s
668:	learn: 0.2323

812:	learn: 0.2321869	total: 29.9s	remaining: 6.88s
813:	learn: 0.2321862	total: 29.9s	remaining: 6.84s
814:	learn: 0.2321854	total: 30s	remaining: 6.8s
815:	learn: 0.2321831	total: 30s	remaining: 6.76s
816:	learn: 0.2321816	total: 30s	remaining: 6.73s
817:	learn: 0.2321802	total: 30.1s	remaining: 6.69s
818:	learn: 0.2321795	total: 30.1s	remaining: 6.65s
819:	learn: 0.2321781	total: 30.1s	remaining: 6.62s
820:	learn: 0.2321767	total: 30.2s	remaining: 6.58s
821:	learn: 0.2321757	total: 30.2s	remaining: 6.54s
822:	learn: 0.2321742	total: 30.2s	remaining: 6.5s
823:	learn: 0.2321723	total: 30.3s	remaining: 6.47s
824:	learn: 0.2321708	total: 30.3s	remaining: 6.43s
825:	learn: 0.2321698	total: 30.3s	remaining: 6.39s
826:	learn: 0.2321691	total: 30.4s	remaining: 6.36s
827:	learn: 0.2321681	total: 30.4s	remaining: 6.32s
828:	learn: 0.2321668	total: 30.4s	remaining: 6.28s
829:	learn: 0.2321664	total: 30.5s	remaining: 6.24s
830:	learn: 0.2321656	total: 30.5s	remaining: 6.21s
831:	learn: 0.232164

973:	learn: 0.2320068	total: 35.6s	remaining: 951ms
974:	learn: 0.2320050	total: 35.7s	remaining: 914ms
975:	learn: 0.2320036	total: 35.7s	remaining: 878ms
976:	learn: 0.2320015	total: 35.7s	remaining: 841ms
977:	learn: 0.2319988	total: 35.8s	remaining: 805ms
978:	learn: 0.2319965	total: 35.8s	remaining: 768ms
979:	learn: 0.2319962	total: 35.8s	remaining: 731ms
980:	learn: 0.2319943	total: 35.9s	remaining: 695ms
981:	learn: 0.2319933	total: 35.9s	remaining: 658ms
982:	learn: 0.2319918	total: 35.9s	remaining: 621ms
983:	learn: 0.2319915	total: 36s	remaining: 585ms
984:	learn: 0.2319908	total: 36s	remaining: 548ms
985:	learn: 0.2319897	total: 36.1s	remaining: 512ms
986:	learn: 0.2319895	total: 36.1s	remaining: 475ms
987:	learn: 0.2319894	total: 36.1s	remaining: 439ms
988:	learn: 0.2319876	total: 36.1s	remaining: 402ms
989:	learn: 0.2319864	total: 36.2s	remaining: 366ms
990:	learn: 0.2319854	total: 36.2s	remaining: 329ms
991:	learn: 0.2319847	total: 36.3s	remaining: 293ms
992:	learn: 0.23

GridSearchCV(cv=5, error_score=nan,
             estimator=<catboost.core.CatBoostRegressor object at 0x0000020897FE7EC8>,
             iid='deprecated', n_jobs=10,
             param_grid={'learning_rate': array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [49]:
search.best_params_

{'learning_rate': 0.01}

In [50]:
search.best_score_

-0.05416058943637192

## Ensembling

In [73]:
data=pd.read_csv("Compilation.csv")
data

Unnamed: 0,ID,Sales1,Sales2,Sales3,Sales4,Sales5,Sales6,Sales7
0,883,105.990876,105.239625,104.097388,104.222740,105.020512,104.804514,104.875000
1,884,105.996883,105.239625,104.097388,104.222740,105.020512,104.804514,104.875000
2,885,106.002878,105.239625,104.097388,104.222740,105.020512,104.804514,104.875000
3,886,106.008874,105.239625,104.097388,104.222740,105.020512,104.804514,104.875000
4,887,75.422744,63.134849,62.789850,63.612532,63.823020,63.891782,65.000000
...,...,...,...,...,...,...,...,...
35995,548083,124.841513,148.763835,149.246618,148.908758,149.372686,148.502798,149.000000
35996,548084,139.184502,129.395834,128.282598,129.333983,129.693824,129.534670,129.666667
35997,548085,139.192373,129.395834,128.282598,129.333983,129.693824,129.534670,129.666667
35998,548086,139.200228,129.395834,128.282598,129.333983,129.693824,129.534670,129.666667


In [77]:
data["Simple_Mean"]=(data["Sales1"]+data["Sales2"]+data["Sales3"]+data["Sales4"]+data["Sales5"]+data["Sales6"]+data["Sales7"])/7

In [79]:
data["Simple_Mean"]

0        104.892951
1        104.893809
2        104.894665
3        104.895522
4         65.382111
            ...    
35995    145.519458
35996    130.727440
35997    130.728564
35998    130.729686
35999    130.730808
Name: Simple_Mean, Length: 36000, dtype: float64

In [83]:
def export_submission(predictions,filename="Sample.csv"):
    
    ss=sample_sub.copy()
    ss["Sample"]=predictions
    ss.to_csv(f"{filename}.csv",index=False)

In [84]:
data["Simple_Mean"]

0        104.892951
1        104.893809
2        104.894665
3        104.895522
4         65.382111
            ...    
35995    145.519458
35996    130.727440
35997    130.728564
35998    130.729686
35999    130.730808
Name: Simple_Mean, Length: 36000, dtype: float64

In [85]:
export_submission(data["Simple_Mean"],"Simple_Mean_All")