# Create project

## Set up and configure connection to SDK

In [2]:
import datarobot as dr #needs 2.25 or later
import pandas as pd
from datetime import datetime
import pickle

In [3]:
dr.Client(config_path = '/Volumes/GoogleDrive/My Drive/Configurations/drconfig.yaml')

<datarobot.rest.RESTClientObject at 0x11b4b9040>

## Load datasets

In [4]:
datasetsPickle = open('datasets.pickle','rb')
datasets = pickle.load(datasetsPickle)
datasetsPickle.close()

## Start with primary dataset

In [5]:
#projName = 'Fault prediction from sensor data Orsted - Project set up'
projName = 'Fault prediction from sensor data Orsted ' + datetime.now().strftime("%Y%m%d-%H%M%S")
print(projName)

Fault prediction from sensor data Orsted 20211123-171143


In [6]:
project = dr.Project.create_from_dataset(datasets['primary'].id, project_name=projName)

In [7]:
print('Working with project ' + project.id)

Working with project 619d12c0762d4fac6f2a303e


You can also load a previous project:

In [8]:
#project = dr.Project.get(project_id='619d12c0762d4fac6f2a303e')

## Connect the secondary dataset to the project

In [9]:
secondary_dataset_identifier = 'biSecondlyReadings'

In [10]:
secondary_dataset_definition = dr.DatasetDefinition(
    identifier=secondary_dataset_identifier,
    catalog_id=datasets['secondary'].id,
    catalog_version_id=datasets['secondary'].version_id,
    primary_temporal_key='timestamp',
    snapshot_policy='latest'
)

In [11]:
datasets_relationship = dr.Relationship(
    dataset2_identifier=secondary_dataset_identifier,
    dataset1_keys=['product_id'],
    dataset2_keys=['product_id'],
    feature_derivation_window_start=-3,
    feature_derivation_window_end=0,
    feature_derivation_window_time_unit='HOUR',
    prediction_point_rounding=1,
    prediction_point_rounding_time_unit='SECOND'
)

In [12]:
relationship_config = dr.RelationshipsConfiguration.create(
    dataset_definitions=[secondary_dataset_definition], 
    relationships=[datasets_relationship])

## Create features and perform supervised Feature Reduction

_Default is True, setting here as an example of how to set SFR using the API_

https://app.datarobot.com/docs/data/transform-data/feature-discovery/fd-gen.html#feature-reduction

In [13]:
advanced_options = dr.AdvancedOptions(
    feature_discovery_supervised_feature_reduction=True)

This process is started off by setting the target

In [None]:
project.set_target(
    target='qc_fail', 
    relationships_configuration_id=relationship_config.id,
    feature_engineering_prediction_point='timestamp', # the prediction point is defined here
    advanced_options=advanced_options,
    mode=dr.AUTOPILOT_MODE.MANUAL)

These are the steps followed:
- Setting target feature
- Loading Secondary Datasets
- Discovering Features
- Selecting Features
- Generating Features
- Exploratory Data Analysis for New Features
- Creating CV and Holdout partitions(100%)
- Characterizing target variable
- Loading dataset and preparing data
- Saving target and partitioning information
- Analyzing features
- Generating blueprints

## Save project

In [15]:
filename = 'project.pickle'
outfile = open(filename,'wb')
pickle.dump(project,outfile)
outfile.close()

## Notes

You can also load or clone from an existing project

In [19]:
projectAUTO = project.clone_project(new_project_name='Fault prediction from sensor data Orsted AUTO')

In [None]:
projectAUTO.set_target(
    target='qc_fail', 
    relationships_configuration_id=relationship_config.id,
    feature_engineering_prediction_point='timestamp', # the prediction point is defined here
    advanced_options=advanced_options,
    mode=dr.AUTOPILOT_MODE.FULL_AUTO)

In [21]:
projectAUTO.set_worker_count(20)

Project(Fault prediction from sensor data Orsted AUTO)

In [22]:
projectAUTO.wait_for_autopilot()

In progress: 2, queued: 81 (waited: 0s)
In progress: 20, queued: 63 (waited: 2s)
In progress: 20, queued: 63 (waited: 3s)
In progress: 20, queued: 63 (waited: 5s)
In progress: 20, queued: 63 (waited: 8s)
In progress: 20, queued: 63 (waited: 11s)
In progress: 20, queued: 63 (waited: 16s)
In progress: 20, queued: 63 (waited: 24s)
In progress: 18, queued: 57 (waited: 38s)
In progress: 19, queued: 51 (waited: 59s)
In progress: 19, queued: 41 (waited: 81s)
In progress: 20, queued: 35 (waited: 102s)
In progress: 20, queued: 23 (waited: 124s)
In progress: 17, queued: 17 (waited: 145s)
In progress: 19, queued: 9 (waited: 166s)
In progress: 15, queued: 0 (waited: 187s)
In progress: 13, queued: 0 (waited: 208s)
In progress: 13, queued: 0 (waited: 229s)
In progress: 13, queued: 0 (waited: 250s)
In progress: 12, queued: 0 (waited: 271s)
In progress: 11, queued: 0 (waited: 292s)
In progress: 7, queued: 0 (waited: 313s)
In progress: 2, queued: 0 (waited: 333s)
In progress: 2, queued: 0 (waited: 354s

In [23]:
projectAUTO.unlock_holdout()

Project(Fault prediction from sensor data Orsted AUTO)

In [24]:
filename = 'projectAUTO.pickle'
outfile = open(filename,'wb')
pickle.dump(projectAUTO,outfile)
outfile.close()