In [None]:
!pip install -q kaggle

In [None]:
!mkdir ~/.kaggle

In [None]:
!touch ~/.kaggle/kaggle.json

In [None]:
api_token = {"username":"alexchagan","key":"bd346664681d63e3d147792e8f1c77d3"}

In [None]:
import json

with open('/root/.kaggle/kaggle.json','w') as file:
    json.dump(api_token,file)

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# Download a dataset from kaggle

!kaggle datasets download -d jakeshbohaju/brain-tumor --force

In [None]:
import zipfile
with zipfile.ZipFile('./brain-tumor.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')

In [None]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
from PIL import Image

# read csv contiaining names of images and correlating class names into dataframe
df = pd.read_csv('./data/Brain Tumor.csv')

image_names = df['Image']
classes = df['Class']

# create a dictionary which points image names to class names
name_to_class = dict(zip(image_names, classes))

# make directories for different classes
if not os.path.exists('./data/tumor'):
    os.mkdir('./data/tumor')
if not os.path.exists('./data/not_tumor'):
    os.mkdir('./data/not_tumor')

# iterate over all images and save each image in the right class folder
folder = './data/Brain Tumor/Brain Tumor/*.jpg'
img_paths = glob.glob(folder)
for path in img_paths:
    img_name = path.split('/')[-1].split('.')[0]
    class_name = name_to_class[img_name]
    if class_name == 0:
        class_name = 'not_tumor'
    else:
        class_name = 'tumor'
        
    raw_img = Image.open(path)
    resized_img = raw_img.resize((224,224)) # resize to (244,244)
    resized_img.save(f'./data/{class_name}/{img_name}'+'.jpg')
    

In [None]:
!pip install -q split-folders

In [None]:
import splitfolders

splitfolders.ratio('./data', output="output", seed=1337, ratio=(.9, 0.0,0.1)) 

In [None]:
import shutil
import os

shutil.rmtree('data')
shutil.rmtree('output/val')
os.rename('output','brain_tumor_data')


In [None]:
import glob
import pandas as pd

folder='./brain_tumor_data/*/*/*.jpg'

category=[]
brain_condition=[]
filenames=[]

all_files = glob.glob(folder)

for filename in all_files:
    if 'train' in filename:
        if 'not_tumor' in filename:
            category.append("train")
            filenames.append(filename)
            brain_condition.append("not_tumor")
        else:
            category.append("train")
            filenames.append(filename)
            brain_condition.append("tumor")
    else:
        if 'not_tumor' in filename:
            category.append("test")
            filenames.append(filename)
            brain_condition.append("not_tumor")
        else:
            category.append("test")
            filenames.append(filename)
            brain_condition.append("tumor") 
   
all_data_df = pd.DataFrame({"dataset type": category,"x-ray result":brain_condition, "filename":filenames})
print(all_data_df.head())


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

g = sns.catplot(x="x-ray result", col="dataset type", kind="count", palette="ch:.150", data=all_data_df, legend=True)

# display height of each patch in the plot
for i in range(2):
    ax=g.facet_axis(0,i)
    for p in ax.patches:
        ax.text(p.get_x()+0.3, p.get_height()*1.05, '{0:.0f}'.format(p.get_height()), color='black', rotation='horizontal', size='large' )
plt.show()

In [82]:
# define bucket name , region and role

bucket = 'medical-ai-tumor'
print("bucket:{}".format(bucket))
region = 'eu-central-1'
print("region:{}".format(region))
roleArn='arn:aws:s3:::medical-ai-tumor'
print("roleArn:{}".format(roleArn))

bucket:medical-ai-tumor
region:eu-central-1
roleArn:arn:aws:s3:::medical-ai-tumor


In [None]:
import os

os.environ["DEFAULT_S3_BUCKET"]=bucket

In [83]:
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

arn:aws:iam::634002663774:role/service-role/AmazonSageMaker-ExecutionRole-20230126T005057


In [None]:
# store data in the bucket in the following directory structure
  # |--class a
  #       |--abc.jpg
  #       |--def.jpg
  # |--class b
  #       |--ghi.jpg
  #       |--jkl.jpg

!aws s3 sync ./brain_tumor_data/train s3://${DEFAULT_S3_BUCKET}/brain_tumor_data/

In [None]:
# define the training model

from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.estimator import Estimator

model_id, model_version = "tensorflow-ic-efficientnet-b3-classification-1", "*"
training_instance_type = "ml.p2.xlarge"

# Retrieve the Docker image
train_image_uri = image_uris.retrieve(model_id=model_id,model_version=model_version,image_scope="training",instance_type=training_instance_type,region=None,framework=None)

# Retrieve the training script
train_source_uri = script_uris.retrieve(model_id=model_id, model_version=model_version, script_scope="training")

# Retrieve the pretrained model tarball for transfer learning
train_model_uri = model_uris.retrieve(model_id=model_id, model_version=model_version, model_scope="training")

# Retrieve the default hyper-parameters for fine-tuning the model
hyperparameters = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)
print(hyperparameters)

In [None]:
training_dataset_s3_path=f"s3://{bucket}/brain_tumor_data/"
s3_output_location=f"s3://{bucket}/models/image_model_effiecentnet"

# Create SageMaker Estimator instance
tf_ic_estimator = Estimator(
    role=role,
    image_uri=train_image_uri,
    source_dir=train_source_uri,
    model_uri=train_model_uri,
    entry_point="transfer_learning.py",
    instance_count=1,
    instance_type=training_instance_type,
    max_run=360000,
    hyperparameters=hyperparameters,
    output_path=s3_output_location,
)

In [None]:
# Use S3 path of the training data to launch SageMaker TrainingJob
tf_ic_estimator.fit({"training": training_dataset_s3_path}, logs=True)

In [None]:
training_job_name = tf_ic_estimator.latest_training_job.job_name

In [84]:
from sagemaker.model import Model
from sagemaker.tensorflow import TensorFlowModel

inference_instance_type = "ml.p2.xlarge"

# Retrieve the inference docker container uri
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)

# Retrieve the inference script uri
deploy_source_uri = script_uris.retrieve(
    model_id=model_id, model_version=model_version, script_scope="inference"
)

endpoint_name = 'Image-classifier-Tumor'

model_url = 's3://medical-ai-tumor/models/image_model_effiecentnet/sagemaker-jumpstart-2023-02-06-00-36-52-371/output/model.tar.gz'

model = Model(image_uri=deploy_image_uri, 
              model_data=model_url,
              entry_point="inference.py",
              source_dir=deploy_source_uri,
              role=role)

# Use the estimator from the previous step to deploy to a SageMaker endpoint
finetuned_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    entry_point="inference.py",
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    endpoint_name=endpoint_name,
)

--------------!

In [85]:
from sagemaker.predictor import Predictor
predictor=Predictor('Image-classifier-Tumor')
print(predictor)

<sagemaker.predictor.Predictor object at 0x7f2ce757e790>


In [86]:
from ast import literal_eval
file_name = 'brain_tumor_data/test/not_tumor/Image1038.jpg'

with open(file_name, "rb") as file:
        img = file.read()
        
query_response = predictor.predict(
    img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
pred = literal_eval(query_response.decode('utf-8'))
print(type(pred["predicted_label"]))

<class 'str'>


In [87]:
import glob
import json
import numpy as np
from ast import literal_eval

file_path='brain_tumor_data/test/*/*.jpg'

file_paths=glob.glob(file_path)

y_true=[]
y_pred=[]

def make_pred():
    for path in file_paths:
        if 'not_tumor' in path:
            y_true.append(0)
        else:
            y_true.append(1)
            
        with open(path, "rb") as file:
            img = file.read()
        query_response = predictor.predict(
            img, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
        pred = literal_eval(query_response.decode('utf-8')) # turn byte object into dict object
        
        if pred["predicted_label"] == "not_tumor":
            y_pred.append(0)
        else:
            y_pred.append(1)


make_pred()
print(y_true)
print(y_pred)
                

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [88]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_true,y_pred)

array([[202,   6],
       [ 16, 153]])

In [89]:
from sklearn.metrics import classification_report
print(classification_report(y_true,y_pred))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       208
           1       0.96      0.91      0.93       169

    accuracy                           0.94       377
   macro avg       0.94      0.94      0.94       377
weighted avg       0.94      0.94      0.94       377

