In [1]:
# @hidden_cell
# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.
from project_lib import Project
project = Project(project_id='6221f375-420b-4076-8477-a0bff673d5ec', project_access_token='p-2+2RwuBPB/I9ewKGUj50cv/A==;6DCgZtW7nP7M2AV2OAu7LQ==:G3TKkaQx04LZPts/xSow9dBl/ur1bTKecYSWFYDYN2Atu1KNE6kR9HalYL7qnnxBM/uy3DprYfbdPzVXETUtTxmLLR5+KTuTzQ==')
pc = project.project_context

from ibm_watson_studio_lib import access_project_or_space
wslib = access_project_or_space({'token':'p-2+2RwuBPB/I9ewKGUj50cv/A==;6DCgZtW7nP7M2AV2OAu7LQ==:G3TKkaQx04LZPts/xSow9dBl/ur1bTKecYSWFYDYN2Atu1KNE6kR9HalYL7qnnxBM/uy3DprYfbdPzVXETUtTxmLLR5+KTuTzQ=='})


## !TODO! Insert the project token

### 1. Create the project token
- Open the watsonx project in a new tab. Click on the "Manage" tab, then on "Access control".
- Click on "Access tokens", then on "New access token".
- Select the role "Editor".

### 2. Insert the project token
- Select the first cell of the notebook (just above).
- Click on the three vertical dots in the top right menu bar, then click on "Insert project token".
- The code to access the project using the token should appear. Run the cell (Click on it, then click the "Run" button).

# Feature Engineering to Predict OEE from Scenario 1

From the data simulating Scenario 1, prepare the dataset before training a Machine Learning model to predict OEE.

**Objective:** Predict OEE one hour ahead (to anticipate a drop in OEE)

**Methodology:** Classify OEE into 2 categories:
- Correct OEE: >= 0.90
- Low OEE: < 0.90

**Model Output:** The model will use current trends to determine whether the OEE will be low or correct in one hour.


## !TODO! Import Scenario 1 Data
1. Click on the next cell  
2. Click on the "</>" icon in the top right corner  
3. Click on "Read data", then "Select data from project", then on the left tab "Data assets"  
4. Select the file "scenario_1.csv", then choose the "pandas DataFrame" option  
5. In the last two lines, replace "df_1" with "df"  
6. Run the cell below


In [2]:

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='t63P23nA3U7xrwc47W1Ztaw8Sd-0Ek7DrC91_HFuI2R6',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/identity/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.direct.us-south.cloud-object-storage.appdomain.cloud')

bucket = 'yu4hackthon2-donotdelete-pr-e4fo84tnniy9mr'
object_key = 'scenario_1.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df = pd.read_csv(body)
df.head(10)


Unnamed: 0.1,Unnamed: 0,v_tag_key1,v_oee,v_tag_humidity,v_var_created_at,v_tag_key2,v_vibration
0,0,0,0.949472,0.800045,1653483856,0,1.992423
1,1,0,0.950118,0.799701,1653483857,0,2.000835
2,2,0,0.949966,0.799325,1653483858,0,1.974526
3,3,0,0.949514,0.799993,1653483859,0,1.97272
4,4,0,0.949811,0.799729,1653483860,0,1.965522
5,5,0,0.950299,0.800444,1653483861,0,1.964132
6,6,0,0.949767,0.801006,1653483862,0,1.98105
7,7,0,0.950033,0.800413,1653483863,0,1.990696
8,8,0,0.950019,0.800783,1653483864,0,2.01848
9,9,0,0.949815,0.801114,1653483865,0,2.00748


## Preprocessing

### Create the variable "v_oee_h+1", representing OEE in one hour

In [3]:
df = df.copy().set_index('Unnamed: 0')

df['v_oee_h+1'] = df['v_oee'].shift(-3600)

# Remove rows with missing values (the last hour)
df = df.dropna().copy()
df

Unnamed: 0_level_0,v_tag_key1,v_oee,v_tag_humidity,v_var_created_at,v_tag_key2,v_vibration,v_oee_h+1
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0.949472,0.800045,1653483856,0,1.992423,0.954757
1,0,0.950118,0.799701,1653483857,0,2.000835,0.954741
2,0,0.949966,0.799325,1653483858,0,1.974526,0.954743
3,0,0.949514,0.799993,1653483859,0,1.972720,0.955255
4,0,0.949811,0.799729,1653483860,0,1.965522,0.954845
...,...,...,...,...,...,...,...
39595,0,0.872743,0.675210,1653523451,1,6.508386,0.848102
39596,0,0.873170,0.675698,1653523452,0,6.528411,0.847573
39597,0,0.872498,0.674050,1653523453,0,6.500480,0.847732
39598,1,0.873659,0.675377,1653523454,0,6.523229,0.847873


### Categorize OEE
- 0 <=> Low OEE (<0.90)
- 1 <=> Correct OEE (>=0.90)

In [4]:
df.loc[:, 'v_oee_h+1_cat'] = 0
df.loc[df['v_oee_h+1']>=0.90, 'v_oee_h+1_cat'] = 1
df

Unnamed: 0_level_0,v_tag_key1,v_oee,v_tag_humidity,v_var_created_at,v_tag_key2,v_vibration,v_oee_h+1,v_oee_h+1_cat
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0,0.949472,0.800045,1653483856,0,1.992423,0.954757,1
1,0,0.950118,0.799701,1653483857,0,2.000835,0.954741,1
2,0,0.949966,0.799325,1653483858,0,1.974526,0.954743,1
3,0,0.949514,0.799993,1653483859,0,1.972720,0.955255,1
4,0,0.949811,0.799729,1653483860,0,1.965522,0.954845,1
...,...,...,...,...,...,...,...,...
39595,0,0.872743,0.675210,1653523451,1,6.508386,0.848102,0
39596,0,0.873170,0.675698,1653523452,0,6.528411,0.847573,0
39597,0,0.872498,0.674050,1653523453,0,6.500480,0.847732,0
39598,1,0.873659,0.675377,1653523454,0,6.523229,0.847873,0


### Feature Engineering
Create new variables by aggregating the key1 and key2 variables by minute  <br>  
The variable `"v_tag_key1_1m"` contains the sum of the `"v_tag_key1"` variable from the previous minute.  <br>  
The same logic applies to the variable `"v_tag_key2_1m"`


In [5]:
# Create the agregate variable from key1
df['v_tag_key1_1m'] = df['v_tag_key1'].rolling(60, min_periods=1).sum()

# Create the agregate variable from key1
df['v_tag_key2_1m'] = df['v_tag_key2'].rolling(60, min_periods=1).sum()

### Export the training data to the project

In [6]:
train_data = df.drop({'v_oee', 'v_oee_h+1', 'v_var_created_at'}, axis=1)
project.save_data('scenario_1_training_data.csv', train_data.to_csv(index=False), overwrite=True)

project_lib is deprecated and will no longer be available in IBM Runtime 25.1.
Please upgrade to ibm_watson_studio_lib instead.


{'file_name': 'scenario_1_training_data.csv',
 'message': 'File saved to project storage.',
 'bucket_name': 'yu4hackthon2-donotdelete-pr-e4fo84tnniy9mr',
 'asset_id': '8ab4a8a9-9530-4673-949d-c8a39d5d98ce'}