In [1]:

import pandas as pd

df = pd.read_csv('/project_data/data_asset/train_flights.csv')
df.head()


Unnamed: 0,MONTH,DAY,DAY_OF_WEEK,ORIGIN_AIRPORT,DESTINATION_AIRPORT,DEPARTURE_DELAY,TAXI_OUT,DISTANCE,DELAYED
0,1,11,7,ABQ,DFW,2.0,11.0,569,0
1,1,12,1,SFO,DFW,-4.0,16.0,1464,0
2,1,2,5,DTW,PHX,-4.0,38.0,1671,1
3,1,2,5,MIA,SJU,19.0,31.0,1045,1
4,1,3,6,MIA,LAS,30.0,20.0,2174,1


### Seperate featues and label

In [2]:
X=df.drop('DELAYED',axis=1)
y=df['DELAYED']

#### Seperate categorical and numerical columns

In [3]:
cat=['MONTH',"DAY","DAY_OF_WEEK","ORIGIN_AIRPORT","DESTINATION_AIRPORT"]
numeric=['DEPARTURE_DELAY','TAXI_OUT',"DISTANCE"]

### Create preprocessor for categorical and numerical columns

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat)])



## Create a pipeline with the preprocessor and an estimator

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingClassifier

clf=GradientBoostingClassifier();
pipe = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', clf)])

### K-fold cross validation

In [9]:
# For Bixi as Model Developer
from sklearn.model_selection import cross_val_score
scores = cross_val_score(pipe, X, y, cv=5)
scores.mean()

0.8839898740877361

### Fit the model 

In [10]:
model=pipe.fit(X,y);

## Store the model

In [13]:
#Create a wml client
from watson_machine_learning_client import WatsonMachineLearningAPIClient
import os;

wml_credentials = {
   "token": os.environ['USER_ACCESS_TOKEN'],
   "instance_id" : "wml_local",
   "url": os.environ['RUNTIME_ENV_APSX_URL'],
   "version": "3.0.0"
}
wml_client = WatsonMachineLearningAPIClient(wml_credentials)
wml_client.set.default_project(os.environ['PROJECT_ID'])

'SUCCESS'

### Provide model meta info
- Please feel free to change the model name

In [14]:
model_props = {
    wml_client.repository.ModelMetaNames.NAME: "airline-sklearn-mak",
    wml_client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.22-py3.6",
    wml_client.repository.ModelMetaNames.TYPE: "scikit-learn_0.22"
}

#### Store the model

In [16]:
published_model_details = wml_client.repository.store_model(model=model, meta_props=model_props, training_data=X,training_target=y)
published_model_details

{'metadata': {'name': 'airline-sklearn-mak',
  'guid': '5942b54e-558e-486a-b146-59acd931a15d',
  'id': '5942b54e-558e-486a-b146-59acd931a15d',
  'project_id': 'f85bdd81-2055-4122-abd3-1ea2df204933',
  'modified_at': '2020-11-12T17:34:59.002Z',
  'created_at': '2020-11-12T17:34:57.002Z',
  'owner': '1000331009',
  'href': '/v4/models/5942b54e-558e-486a-b146-59acd931a15d?project_id=f85bdd81-2055-4122-abd3-1ea2df204933'},
 'entity': {'name': 'airline-sklearn-mak',
  'project': {'id': 'f85bdd81-2055-4122-abd3-1ea2df204933',
   'href': '/v2/projects/f85bdd81-2055-4122-abd3-1ea2df204933'},
  'training_data_references': [{'location': {'bucket': 'not_applicable'},
    'type': 'fs',
    'connection': {'access_key_id': 'not_applicable',
     'secret_access_key': 'not_applicable',
     'endpoint_url': 'not_applicable'},
    'schema': {'id': '1',
     'type': 'DataFrame',
     'fields': [{'name': 'MONTH', 'type': 'int64'},
      {'name': 'DAY', 'type': 'int64'},
      {'name': 'DAY_OF_WEEK', 'type