In [14]:
import sagemaker
from sagemaker.tensorflow import TensorFlow
import os 
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

role = get_execution_role()
region = sagemaker_session.boto_session.region_name


## 数据预处理
将视频数据转换成np

In [158]:
from os import *
# import tqdm
from tqdm import *
import codecs
from keras.utils import np_utils
import videoto3d 

def loaddata(video_dir, vid3d, nclass, result_dir, color=False, skip=True):
        files = os.listdir(video_dir)
        X = []
        labels = []
        labellist = []

        pbar = tqdm(total=len(files))

        for filename in files:
            pbar.update(1)
            if filename == '.DS_Store':
                continue
            name = os.path.join(video_dir, filename)
            print('filename is ',filename)
            label = vid3d.get_UCF_classname(filename)
            if label not in labellist:
                if len(labellist) >= nclass:
                    continue
                labellist.append(label)
            labels.append(label)
            X.append(vid3d.video3d(name, color=color, skip=skip))

        pbar.close()
        print('result_dir is ',result_dir)
        fpath = result_dir + 'classes.txt'
        fp = codecs.open(fpath,'a','utf-8')
        print('labels length is ',len(labellist))
        for i in range(len(labellist)):
            fp.write('{}\n'.format(labellist[i]))

        for num, label in enumerate(labellist):
            for i in range(len(labels)):
                if label == labels[i]:
                    labels[i] = num
        if color:
            return np.array(X).transpose((0, 2, 3, 4, 1)), labels
        else:
            return np.array(X).transpose((0, 2, 3, 1)), labels

        
def process():
        nclass = 8
        depth = 15
        skip = False
        color = True
        img_rows, img_cols, frames = 32, 32, depth

        channel = 3 if color else 1
#         fname_npz = 'dataset_{}_{}_{}.npz'.format(
#                 nclass, depth, skip)
        fname_npz = 'np-datasets/train_data.npz'
        output = 'default-output/'
        videos = 'dataset/'

        vid3d = videoto3d.Videoto3D(img_rows, img_cols, frames)
        nb_classes = nclass
        if os.path.exists(fname_npz):
                loadeddata = np.load(fname_npz)
                X, Y = loadeddata["X"], loadeddata["Y"]
#                 print(X)
#                 print(Y)
        else:
                x, y = loaddata(videos, vid3d, nclass,
                                output, color, skip)
                X = x.reshape((x.shape[0], img_rows, img_cols, frames, channel))
                Y = np_utils.to_categorical(y, nb_classes)

                X = X.astype('float32')
                np.savez(fname_npz, X=X, Y=Y)
        print('Saved dataset to dataset.npz.')
        print('X_shape:{}\nY_shape:{}'.format(X.shape, Y.shape))


In [159]:
process()

Saved dataset to dataset.npz.
X_shape:(15, 32, 32, 15, 3)
Y_shape:(15, 8)


## 数据准备
将视频数据转换为向量部分代码，独立出来

开始上传数据


In [160]:
# CLI上传
# aws s3 cp video_sample s3://sagemaker-studio-517141035927-qdeikgx1x88/ --recursive
# python s3 demo
# https://github.com/aws-samples/aws-python-sample/blob/master/s3_sample.py

In [161]:
inputs = sagemaker.Session().upload_data(path='np-datasets', key_prefix='dataset/np')
# inputs = 's3://sagemaker-studio-517141035927-qdeikgx1x88/videos/'.format(region)
# inputs = {'training': f'file://{customer/video-classify/3DCNN/dataset}'}
inputs

's3://sagemaker-us-west-2-517141035927/dataset/np'

In [None]:
#     parser.add_argument('--batch', type=int, default=128)
#     parser.add_argument('--epoch', type=int, default=100)
#     parser.add_argument('--videos', type=str, default='UCF101',
#                         help='directory where videos are stored')
#     parser.add_argument('--nclass', type=int, default=101)
#     parser.add_argument('--output', type=str, required=True)
#     parser.add_argument('--color', type=bool, default=False)
#     parser.add_argument('--skip', type=bool, default=True)
#     parser.add_argument('--depth', type=int, default=10)
    
hyperparameters = {'epoch': 3, 
                   'data_dir': '/opt/ml/input/data/training',
                   'batch': 3, 
#                    'videos': 'file://home/sagemaker-user/customer/video-classify/3DCNN/dataset', 
#                    'videos':'/opt/ml/input/data/training',
                   'nclass': 8,
                   'output': '/opt/ml/output',
                  }


代码读取github方式：

In [163]:
# git_config = {'repo': 'https://github.com/VerRan/3DCNN.git', 'branch': 'script'}
# estimator = TensorFlow(entry_point='sagemaker-3dcnn.py',
# #                        source_dir='char-rnn-tensorflow',
#                        git_config=git_config,
#                        train_instance_type='ml.c4.xlarge', # Executes training in a ml.c4.xlarge instance
#                        train_instance_count=1,
#                        hyperparameters=hyperparameters,
#                        role=sagemaker.get_execution_role(),
#                        framework_version='1.15.2',
#                        py_version='py3',
#                        script_mode=True)

本地调试

In [168]:
! python3 sagemaker-3dcnn.py --batch 3 --data_dir np-datasets --epoch 3 --output default-output  --nclass 8

Using TensorFlow backend.
Traceback (most recent call last):
  File "sagemaker-3dcnn.py", line 110, in <module>
    main()
  File "sagemaker-3dcnn.py", line 53, in main
    parser.add_argument('--hosts', type=list, default=json.loads(os.environ.get('SM_HOSTS')))
NameError: name 'json' is not defined


In [169]:
estimator = TensorFlow(entry_point='sagemaker-3dcnn.py',
#                        source_dir='char-rnn-tensorflow',
#                        git_config=git_config,
#                        dependencies=['/usr/local/lib/python3.6/dist-packages/opencv_python_headless-4.4.0.46.dist-info','videoto3d.py'],
                       train_instance_type='ml.c4.xlarge', # Executes training in a ml.c4.xlarge instance
                       train_instance_count=1,
                       hyperparameters=hyperparameters,
                       role=sagemaker.get_execution_role(),
                       framework_version='1.15.2',
                       py_version='py3',
                       script_mode=True)

In [170]:
estimator.fit({'training': inputs})

2020-12-16 13:34:17 Starting - Starting the training job...
2020-12-16 13:34:19 Starting - Launching requested ML instances......
2020-12-16 13:35:29 Starting - Preparing the instances for training......
2020-12-16 13:36:42 Downloading - Downloading input data...
[0m
[34m2020-12-16 13:37:31,001 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-12-16 13:37:31,006 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-16 13:37:31,531 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-16 13:37:31,548 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-16 13:37:31,563 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-16 13:37:31,574 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "chann