# Model Training

We will be training and building the model, based on the feature engineered data that is stored in the table. The entire training 
is done using Snowpark optimized warehouse and using stored procedure. Hence all the operations will be natively executed in Snowflake.

In [57]:
from IPython.display import display, HTML, Image , Markdown
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T
import snowflake.snowpark.functions as F
import os ,configparser ,json ,logging

# Import the commonly defined utility scripts using
# dynamic path include
import sys
sys.path.append('../python/lutils')
import sflk_base as L

display(Markdown("### Initialization"))
logging.basicConfig(stream=sys.stdout, level=logging.ERROR)

# Source various helper functions
%run ./scripts/notebook_helpers.py

# Define the project home directory, this is used for locating the config.ini file
PROJECT_HOME_DIR = '../../'
config = L.get_config(PROJECT_HOME_DIR)
sp_session = L.connect_to_snowflake(PROJECT_HOME_DIR)

if(sp_session == None):
    raise Exception(f'Unable to connect to snowflake. Validate connection information ')

sp_session.use_role(f'''{config['APP_DB']['role']}''')
sp_session.use_schema(f'''{config['APP_DB']['database']}.{config['APP_DB']['schema']}''')
sp_session.use_warehouse(f'''{config['APP_DB']['snow_opt_wh']}''')

df = sp_session.sql('select current_user() ,current_role() ,current_database() ,current_schema();').to_pandas()
display(df)

### Initialization

Unnamed: 0,CURRENT_USER(),CURRENT_ROLE(),CURRENT_DATABASE(),CURRENT_SCHEMA()
0,BECKY,PUBLIC,INDSOL_DICOM_DB,PUBLIC


In [61]:
df = sp_session.table('images_formatted').sample(n=5).to_pandas()
display(df)

Unnamed: 0,IMAGE_FILE_PATH,CLASS_LABEL,IMAGE_ARRAY,IMAGE_ARRAY_SHAPE_0,IMAGE_ARRAY_SHAPE_1,NORMALIZED_IMAGE_ARRAY,PARSING_EXECPTION,STATUS,RESIZED_FEATURE,CLASS_LABEL_NUM
0,@data_stg/NORMAL/NORMAL2-IM-1152-0001-0001.jpeg,NORMAL,"""[0, 0, 2, 14, 27, 39, 54, 66, 76, 83, 89, 94,...",150,150,"""[0.0, 0.0, 0.00784313725490196, 0.05490196078...",,True,"""[0.0, 0.0, 0.00784313725490196, 0.05490196078...",1
1,@data_stg/NORMAL/NORMAL2-IM-0777-0001.jpeg,NORMAL,"""[65, 64, 62, 63, 65, 78, 129, 128, 115, 114, ...",150,150,"""[0.2549019607843137, 0.25098039215686274, 0.2...",,True,"""[0.2549019607843137, 0.25098039215686274, 0.2...",1
2,@data_stg/NORMAL/NORMAL2-IM-0776-0001.jpeg,NORMAL,"""[78, 84, 88, 99, 114, 118, 125, 133, 143, 146...",150,150,"""[0.3058823529411765, 0.32941176470588235, 0.3...",,True,"""[0.3058823529411765, 0.32941176470588235, 0.3...",1
3,@data_stg/PNEUMONIA/person770_virus_1398.jpeg,PNEUMONIA,"""[0.38100050255911144, 0.38885678734159373, 0....",150,150,"""[0.0014941196178788684, 0.0015249285778101715...",,True,"""[0.0014941196178788684, 0.0015249285778101715...",0
4,@data_stg/PNEUMONIA/person381_bacteria_1731.jpeg,PNEUMONIA,"""[9, 10, 10, 11, 12, 13, 14, 14, 15, 16, 20, 2...",150,150,"""[0.03529411764705882, 0.0392156862745098, 0.0...",,True,"""[0.03529411764705882, 0.0392156862745098, 0.0...",0


In [62]:
import time

image_count = 10*1000
epochs = 3
display(Markdown("Model training ..."))

t = time.process_time()
stmt = f''' call train_pneumonia_identification_model(
        {image_count} 
        ,'@model_stg' 
        ,'{config['APP_DB']['model_flname']}' 
        ,{epochs}); '''
print(stmt)
out_df = sp_session.sql(stmt).collect()
elapsed_time = (time.process_time() - t) #/60

print(f'Total execution time for training: {elapsed_time} minutes')
print(out_df)

Model training ...

 call train_pneumonia_identification_model(
        10000 
        ,'@model_stg' 
        ,'pneumonia_model.joblib' 
        ,3); 
Total execution time for training: 0.038557999999998316 minutes
[Row(TRAIN_PNEUMONIA_IDENTIFICATION_MODEL='{\n  "ac": 0.33604336043360433,\n  "cf": {\n    "Normal (Class 1)": {\n      "f1-score": 0.45796460176991155,\n      "precision": 0.296987087517934,\n      "recall": 1,\n      "support": 207\n    },\n    "Pneumonia (Class 0)": {\n      "f1-score": 0.14335664335664336,\n      "precision": 1,\n      "recall": 0.07721280602636535,\n      "support": 531\n    },\n    "accuracy": 0.33604336043360433,\n    "macro avg": {\n      "f1-score": 0.30066062256327747,\n      "precision": 0.648493543758967,\n      "recall": 0.5386064030131826,\n      "support": 738\n    },\n    "name": "DICOM_Training_2023-02-07 01:46:39.545122",\n    "weighted avg": {\n      "f1-score": 0.23160033900914542,\n      "precision": 0.8028134513769816,\n      "recall": 0.33604336043360433,

In [52]:
import pandas as pd

rows = sp_session.sql(f''' list @model_stg; ''').collect()
data = []
for r in rows:
    data.append({
        'name': r['name']
        ,'size': r['size']
        ,'last_modified': r['last_modified']
    })

df = pd.json_normalize(data)
display(df)



Unnamed: 0,name,size,last_modified
0,model_stg/pneumonia_model.joblib,95047952,"Tue, 7 Feb 2023 08:56:19 GMT"


--- 
### Closeout

    With that we are finished this section of the demo setup

In [54]:
sp_session.close()
print('Finished!!!')

Finished!!!
