In [1]:
%matploblib inline

In [2]:
import os
from shutil import copyfile, rmtree
import numpy as np
import pandas as pd

In [3]:
ROOT_DIR = 'traindata_raw'

In [4]:
df = pd.read_fwf("traindata_raw/ls.txt", header=None, parse_dates=[[5,6]])

In [5]:
df = df[['5_6', 8]]

In [6]:
df.head()

Unnamed: 0,5_6,8
0,2018-01-23 13:38:53.291493678,traindata/0000de19e9884c5da9653431d993f68e.jpg
1,2018-01-27 12:55:09.268084268,traindata/00094b84bd7041f69e86438840aa0fd5.jpg
2,2018-01-23 13:19:47.332931328,traindata/0009cf38cb104cb19d548c89a3e15070.jpg
3,2018-01-23 13:40:39.031361015,traindata/000fd23f8c5c42f69760f615e5db88e1.jpg
4,2018-01-27 12:51:26.078418871,traindata/00118a9d61894582875dbccb60d856b5.jpg


In [7]:
df.columns = ['datetime', 'path_img']

In [8]:
df['path_img'] = df['path_img'].apply(lambda x: os.path.join(ROOT_DIR, x.split('/')[1]))

In [9]:
df['datetime'] = pd.to_datetime(df['datetime'])

In [11]:
df.sort_values(by=['datetime'], inplace=True)

In [12]:
df['seconds_from_prev'] = (df['datetime'] - df['datetime'].shift(1)).dt.total_seconds()

In [13]:
df.head()

Unnamed: 0,datetime,path_img,seconds_from_prev
9967,2018-01-17 17:52:19.756352384,traindata_raw/a3d795e2cf68449ba36e71b5715aaaba...,
3781,2018-01-17 17:53:20.057303596,traindata_raw/3e427666742b4d03a460480d2db795ca...,60.300951
12085,2018-01-17 17:53:21.257250053,traindata_raw/c76f5be6591f4d7b8ee7d04f0694720e...,1.199946
5310,2018-01-17 17:53:22.767184935,traindata_raw/58c4af7195b742f784052456bb746888...,1.509935
8649,2018-01-17 17:53:24.197125598,traindata_raw/8e643d52dfd84f6382800fb85046f8e7...,1.429941


In [14]:
df['session'] = 0

In [15]:
df.loc[df.seconds_from_prev > 2 * 60, 'session'] = 1

In [16]:
df['session'] = df['session'].cumsum()
df['seq'] = df.groupby('session')['datetime'].rank().astype(int)
df['seq'] = df['seq'].apply(lambda x: 'image%05d.jpg' % x)

In [17]:
df.tail()

Unnamed: 0,datetime,path_img,seconds_from_prev,session,seq
386,2018-01-27 13:21:27.211498320,traindata_raw/05ede3814d72412e868330cf8448e7f1...,0.839997,17,image00883.jpg
5868,2018-01-27 13:21:28.011495792,traindata_raw/61977714bf1c437da1ab9578f01c506d...,0.799997,17,image00884.jpg
13461,2018-01-27 13:21:29.011492633,traindata_raw/de3be6aa3c084016ae1e9a827d814c9d...,0.999997,17,image00885.jpg
9453,2018-01-27 13:21:30.111489160,traindata_raw/9b6f7e9a9ea04dd09a32f870329e8041...,1.099997,17,image00886.jpg
4124,2018-01-27 13:21:31.251485563,traindata_raw/445356394493430fb5f155350e45c0eb...,1.139996,17,image00887.jpg


In [18]:
def save_sess(df_sess, overwrite=False):
    session = df_sess['session'].iloc[0]
    sess_dir = 'traindata_raw/sessions/{0}'.format(session)
    if os.path.isdir(sess_dir):
        if overwrite:
            rmtree(sess_dir)
            os.mkdir(sess_dir)
    else:
        os.mkdir(sess_dir)
    print("Writing to %s" % sess_dir)
    df_sess[['path_img', 'seq']].apply(lambda x: copyfile(x['path_img'], os.path.join(sess_dir, x['seq'])), axis=1)
    return sess_dir

In [19]:
dirs = df.groupby('session').apply(lambda x: save_sess(x, overwrite=True))

Writing to traindata_raw/sessions/0
Writing to traindata_raw/sessions/0
Writing to traindata_raw/sessions/1
Writing to traindata_raw/sessions/2
Writing to traindata_raw/sessions/3
Writing to traindata_raw/sessions/4
Writing to traindata_raw/sessions/5
Writing to traindata_raw/sessions/6
Writing to traindata_raw/sessions/7
Writing to traindata_raw/sessions/8
Writing to traindata_raw/sessions/9
Writing to traindata_raw/sessions/10
Writing to traindata_raw/sessions/11
Writing to traindata_raw/sessions/12
Writing to traindata_raw/sessions/13
Writing to traindata_raw/sessions/14
Writing to traindata_raw/sessions/15
Writing to traindata_raw/sessions/16
Writing to traindata_raw/sessions/17


In [20]:
import subprocess

In [21]:
for d in dirs:
    print(d)
    subprocess.Popen("/usr/local/bin/ffmpeg -f image2 -r 50 -i {dir}/image%05d.jpg -vcodec mpeg4 -y {dir}/movie.mp4".format(dir=d), shell=True)

traindata_raw/sessions/0
traindata_raw/sessions/1
traindata_raw/sessions/2
traindata_raw/sessions/3
traindata_raw/sessions/4
traindata_raw/sessions/5
traindata_raw/sessions/6
traindata_raw/sessions/7
traindata_raw/sessions/8
traindata_raw/sessions/9
traindata_raw/sessions/10
traindata_raw/sessions/11
traindata_raw/sessions/12
traindata_raw/sessions/13
traindata_raw/sessions/14
traindata_raw/sessions/15
traindata_raw/sessions/16
traindata_raw/sessions/17


In [22]:
%pylab inline 
import cv2

Populating the interactive namespace from numpy and matplotlib


In [23]:
from IPython.display import clear_output

In [24]:
def display_video(vidfile, frames=10):
    # Grab the input device, in this case the webcam
    # You can also give path to the video file
    vid = cv2.VideoCapture(vidfile)

    # Put the code in try-except statements
    # Catch the keyboard exception and 
    # release the camera device and 
    # continue with the rest of code.
    try:
        for _ in range(frames):
            # Capture frame-by-frame
            ret, frame = vid.read()
            if not ret:
                # Release the Video Device if ret is false
                vid.release()
                # Message to be displayed after releasing the device
                print "Released Video Resource"
                break
            # Convert the image from OpenCV BGR format to matplotlib RGB format
            # to display the image
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Turn off the axis
            axis('off')
            # Title of the window
            title("Input Stream")
            # Display the frame
            imshow(frame)
            show()
            # Display the frame until new frame is available
            clear_output(wait=True)
    except KeyboardInterrupt:
        # Release the Video Device
        vid.release()
        # Message to be displayed after releasing the device
        print "Released Video Resource"

In [28]:
display_video("traindata_raw/sessions/17/movie.mp4", frames=1000)

Released Video Resource
