In [3]:
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn import compose
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [4]:
df = pd.read_csv('instacart.csv')
df.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,2,33120,1,1,Organic Egg Whites,86,16,eggs,dairy eggs,202279,prior,3,5,9,8.0
1,2,28985,2,1,Michigan Organic Kale,83,4,fresh vegetables,produce,202279,prior,3,5,9,8.0
2,2,9327,3,0,Garlic Powder,104,13,spices seasonings,pantry,202279,prior,3,5,9,8.0
3,2,45918,4,1,Coconut Butter,19,13,oils vinegars,pantry,202279,prior,3,5,9,8.0
4,2,30035,5,0,Natural Sweetener,17,13,baking ingredients,pantry,202279,prior,3,5,9,8.0


In [5]:
df_c=df[:10000]

In [6]:
X_train=df_c.loc[(df.eval_set == 'prior')][['product_id','reordered','aisle_id','department_id','order_number',
                                          'order_dow','order_hour_of_day','days_since_prior_order']]
y_train=df_c.loc[(df.eval_set == 'prior')]['add_to_cart_order']

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn import impute
from sklearn import preprocessing

In [9]:
def make_pipelines():
    """Create a pipeline for each of the following algorithms:
    1. Logistic Regression
    2. k-nearest neighbors (KNN) 
    3. Naive Bayes (Guassian)
    4. Support Vector Machines (SVM)
    5. Random Forest™ 
    
    If appropriate, apply StandardScaler before the algorithm.   
    Use default hyperparameters.
    If an algorithm takes random_state then random_state=42 
    
    Return a list of all the pipelines.
    """ 
    numeric_features=['product_id','reordered','aisle_id','department_id','order_number',
                                          'order_dow','order_hour_of_day','days_since_prior_order']
    numeric_transformer = Pipeline(steps=[
    ('imputer', impute.SimpleImputer(strategy='median'))])
    
    preprocessor = compose.ColumnTransformer(transformers=[
        ('num', numeric_transformer, numeric_features)])
    
    LR=Pipeline([('preprocessor', preprocessor),('LogisticRegression', LogisticRegression(random_state=42))])
    KNN=Pipeline([('preprocessor', preprocessor),('scaler', StandardScaler()),('KNeighborsClassifier', KNeighborsClassifier())])
    NB=Pipeline([('preprocessor', preprocessor),('GaussianNB', GaussianNB())])
    SVM=Pipeline([('preprocessor', preprocessor),('scaler', StandardScaler()), ('SVC', SVC(random_state=42))])
    RF=Pipeline([ ('preprocessor', preprocessor),('RandomForestClassifier', RandomForestClassifier(random_state=42))])
    
    

    pipelines=[LR,KNN,NB,SVM,RF]
    return pipelines

In [10]:
pipelines = make_pipelines()

In [11]:
for pipe in pipelines:
    pipe.fit(X_train, y_train)



In [12]:
from sklearn.metrics import classification_report

for pipe in pipelines:
    print(pipe.steps[-1][0],'\n',classification_report(y_train, pipe.predict(X_train)))

  'precision', 'predicted', average, warn_for)


LogisticRegression 
               precision    recall  f1-score   support

           1       0.10      0.74      0.17       977
           2       0.10      0.27      0.15       940
           3       0.07      0.02      0.03       895
           4       0.00      0.00      0.00       831
           5       0.00      0.00      0.00       768
           6       0.00      0.00      0.00       697
           7       0.00      0.00      0.00       627
           8       0.00      0.00      0.00       555
           9       0.00      0.00      0.00       492
          10       0.00      0.00      0.00       433
          11       0.00      0.00      0.00       369
          12       0.00      0.00      0.00       309
          13       0.00      0.00      0.00       280
          14       0.00      0.00      0.00       248
          15       0.00      0.00      0.00       212
          16       0.00      0.00      0.00       185
          17       0.00      0.00      0.00       164
      

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


KNeighborsClassifier 
               precision    recall  f1-score   support

           1       0.26      0.77      0.39       977
           2       0.26      0.56      0.36       940
           3       0.30      0.46      0.36       895
           4       0.28      0.34      0.31       831
           5       0.29      0.27      0.28       768
           6       0.32      0.24      0.28       697
           7       0.30      0.20      0.24       627
           8       0.38      0.19      0.25       555
           9       0.32      0.14      0.19       492
          10       0.35      0.12      0.18       433
          11       0.36      0.09      0.14       369
          12       0.34      0.09      0.15       309
          13       0.34      0.06      0.11       280
          14       0.47      0.06      0.11       248
          15       0.38      0.06      0.10       212
          16       0.24      0.03      0.06       185
          17       0.12      0.01      0.02       164
    

  'precision', 'predicted', average, warn_for)
