In [1]:
from __future__ import print_function

import numpy as np
import cv2 as cv
import video
import sys
# import main()

import librosa
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as ipd
import soundfile as sf

In [2]:
printFlag = True
GREEN = (0,255,0)

In [3]:
def draw_flow(img, flow, step=16):
    h, w = img.shape[:2]
    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2, -1).astype(int)
    fx, fy = flow[y,x].T
    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
    lines = np.int32(lines + 0.5)
    vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
    cv.polylines(vis, lines, 0, GREEN)
    for (x1, y1), (_x2, _y2) in lines:
        cv.circle(vis, (x1, y1), 1, GREEN, -1)
    return vis

In [4]:
def draw_hsv(flow):
    h, w = flow.shape[:2]
    fx, fy = flow[:,:,0], flow[:,:,1]
    angle = np.arctan2(fy, fx) + np.pi
    v = np.sqrt(fx*fx+fy*fy)
    hsv = np.zeros((h, w, 3), np.uint8)
    hsv[...,0] = angle * (180/np.pi/2)
    hsv[...,1] = 255
    # hsv[...,2] = np.minimum(v*4, 255)
    hsv[...,2] = cv.normalize(v, None, 0, 255, cv.NORM_MINMAX)
    bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
    return bgr

In [5]:
vid = "./video/stoneturntable1.mp4"
pixel_x, pixel_y = 0, 0
show_hsv, show_glitch = True, False
# vid = input("video_path: ")
# pixel_x, pixel_y = map(int, input('pixel location: ').split())
# show_hsv = map(bool, input("Show HSV?: "))
# show_glitch = map(bool, input("Show glitch?: "))

In [6]:
cam = video.create_capture(vid)  # cv.VideoCapture()
if not cam.isOpened():
    print("Camera open failed!")
    sys.exit()
# 캠의 속성값을 불러온다.
w = round(cam.get(cv.CAP_PROP_FRAME_WIDTH))
h = round(cam.get(cv.CAP_PROP_FRAME_HEIGHT))
fps = cam.get(cv.CAP_PROP_FPS) # 카메라에 따라 값이 정상적, 비정상적
# fourcc 값 받아오기, *는 문자를 풀어쓰는 방식, *'DIVX' == 'D', 'I', 'V', 'X'
fourcc = cv.VideoWriter_fourcc(*'DIVX')
# 1프레임과 다음 프레임 사이의 간격 설정
delay = round(1000/fps)
print(fps)

# 웹캠으로 찰영한 영상을 저장하기
# cv2.VideoWriter 객체 생성, 기존에 받아온 속성값 입력 
flowout = cv.VideoWriter('./recorded/'+ vid.rstrip('.mp4') +'_flow.avi', fourcc, fps, (w, h))
hsvout = cv.VideoWriter('./recorded/'+ vid.rstrip('.mp4') +'_hsv.avi', fourcc, fps, (w, h))

# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
_ret, prev = cam.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY)

show_hsv = (show_hsv == 'True')
show_glitch = (show_glitch == 'True')
cur_glitch = prev.copy()
magnitude_array = np.array([])
loopcnt = 0
pixelflow = np.zeros((w, h))
print(np.shape(pixelflow))

29.97002997002997
(1280, 720)


In [7]:
while True:
    loopcnt += 1
    # ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
    _ret, frame = cam.read()
    
    # prints image size
    # print(np.shape(frame))
    
    # Converts each frame to grayscale - we previously only converted the first frame to grayscale
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    # Calculates dense optical flow by Farneback method
    # https://docs.opencv.org/3.0-beta/modules/video/doc/motion_analysis_and_object_tracking.html#calcopticalflowfarneback

    # prev, next: 이전 영상과 현재 영상
    # flow: 계산된 옵티컬플로우
    # pyr_scale: 피라미드 영상 만들 때 축소 비율
    # levels: 피라미드 영상 개수
    # winsize: 평균 윈도우 크기
    # iterations: 각 피라미드 레벨에서 알고리즘 반복 횟수
    # poly_n: 다항식 확장을 위한 이웃 픽셀 크기. 보통 5 or 7.
    # poly_sigma: 가우시안 표준편차. 보통 poly_n = 5이면 1.1, poly_n = 7이면 1.5
    # flags: 0, cv2.OPTFLOW_USE_INITIAL_FLOW, cv2.OPTFLOW_FARNEBACK_GAUSSIAN.
    # flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, pyr_scale = 0.5, \
    #                                    levels = 3, winsize = 15, iterations = 3, \
    #                                    poly_n = 5, poly_sigma = 1.2, flags = 0)
    flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    prevgray = gray
    
    ##### magnitude and angle #####

    # Print cartesian value of magnitude and angle
    magnitudeInCart = flow[..., 0]
    angleInCart = flow[..., 1]
    # if(printFlag):
        # print(magnitudeInCart, angleInCart)

    # Computes the magnitude and angle of the 2D vectors
    magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees = True)
    polarFlow = np.stack([magnitude, angle], axis = 2)
    
    # Generating complex array for sound generation
    
    comp_polarFlow = np.vectorize(complex)(polarFlow[...,0], polarFlow[...,1])
    flat_comp_polarFlow = np.ravel(comp_polarFlow, order='C')
    reduced = np.zeros((7
    
    ampphase = np.zeros_like(polarFlow)
    ampphase[..., 0] = polarFlow[..., 0] * np.cos(np.deg2rad(polarFlow[...,1]))
    ampphase[..., 1] = polarFlow[..., 0] * np.sin(np.deg2rad(polarFlow[...,1]))
    comp_ampphase = np.vectorize(complex)(ampphase[...,0], ampphase[...,1])
    print("asdf: ", comp_ampphase.dtype)
    flat_comp_ampphase = np.ravel(comp_ampphase, order='C')
    # print(flat_comp_ampphase.ndim)
    # if(printFlag):
        # print("magnitude: ", magnitude, "angle: ", angle)
        # print("shape: ", np.shape(magnitude), np.shape(angle))
        
    # prints out polarized magnitude and angle of input pixel position
    pixel_magnitude = magnitude[pixel_x, pixel_y]
    pixel_angle = angle[pixel_x, pixel_y]
    if(printFlag):
      print(pixel_magnitude, pixel_angle)
    magnitude_array = np.append(magnitude_array, pixel_magnitude)

    # prints out the average of polarized magnitude and angle
    avg_magnitude = np.average(magnitude)
    avg_angle = np.average(angle)
    # if printFlag:
        # print(avg_magnitude, avg_angle)
        
    reshaped = flat_comp_ampphase.reshape(-1, 1)
    ##### print #####  
    if loopcnt == 1:
        stacked = reshaped
    else: 
        stacked = np.concatenate([stacked, reshaped], axis = 1)
    
    if loopcnt == 10:
        print(np.shape(flow))
        print(flow)
        print(np.shape(magnitude))
        print(magnitude)
        print(np.shape(angle))
        print(angle)
        print(np.shape(polarFlow))
        print(polarFlow)
        print(np.shape(comp_polarFlow))
        print(comp_polarFlow)
        print(np.shape(comp_ampphase))
        print(comp_ampphase)
        print(np.shape(flat_comp_polarFlow))
        print(flat_comp_polarFlow)
        print(np.shape(flat_comp_ampphase))
        print(flat_comp_ampphase)
    if loopcnt == 150:
        print(stacked)
        print(np.shape(stacked))
        break
    # flowout.write(draw_flow(gray, flow))
    # hsvout.write(draw_hsv(flow))
    # cv.imshow('flow', draw_flow(gray, flow))
    # if show_hsv:
    #     cv.imshow('flow HSV', draw_hsv(flow))
    # if show_glitch:
    #     cur_glitch = warp_flow(cur_glitch, flow)
    #     cv.imshow('glitch', cur_glitch)
    ch = cv.waitKey(1)
    if ch == ord('q'):
        break
    if ch == ord('1'):
        show_hsv = not show_hsv
        print('HSV flow visualization is', ['off', 'on'][show_hsv])
    if ch == ord('2'):
        show_glitch = not show_glitch
        if show_glitch:
            cur_glitch = frame.copy()
        print('glitch is', ['off', 'on'][show_glitch])

asdf:  complex128
1.1618836e-06 92.01804
asdf:  complex128
0.0 0.0
asdf:  complex128
0.0 0.0
asdf:  complex128
0.0 0.0
asdf:  complex128
0.0016608866 63.25926
asdf:  complex128
7.0399015e-05 252.54869
asdf:  complex128
6.993815e-05 60.95809
asdf:  complex128
0.00037311026 71.85928
asdf:  complex128
0.00073671946 344.79584
asdf:  complex128
0.0010376511 71.644714
(720, 1280, 2)
[[[ 3.2670156e-04  9.8487863e-04]
  [ 2.8298420e-04  1.3450724e-03]
  [ 1.9393172e-05  1.8411303e-03]
  ...
  [-2.1924010e-04  6.0775448e-05]
  [-1.0671787e-04  3.4641191e-05]
  [-3.2187054e-05  2.1207381e-05]]

 [[ 3.3001264e-04  1.0308698e-03]
  [ 3.5755799e-04  1.4247770e-03]
  [ 1.2491620e-04  1.9209594e-03]
  ...
  [-3.3879458e-04  8.2267310e-05]
  [-1.9091440e-04  4.9294515e-05]
  [-7.6501477e-05  2.8699333e-05]]

 [[ 2.8942598e-04  9.7102392e-04]
  [ 3.5168757e-04  1.3558674e-03]
  [ 1.7965621e-04  1.8257141e-03]
  ...
  [-3.5402746e-04  8.8736720e-05]
  [-2.0303483e-04  5.5265420e-05]
  [-8.3037979e-05  3

In [27]:
print(np.abs(stacked))
print(np.min(np.abs(stacked)))
print(np.max(np.abs(stacked)))

[[1.16188364e-06 0.00000000e+00 0.00000000e+00 ... 4.35137563e-05
  1.09875049e-16 1.86168687e-04]
 [1.59498714e-06 0.00000000e+00 0.00000000e+00 ... 1.26076327e-04
  2.48706652e-16 3.65674038e-04]
 [2.27608646e-06 0.00000000e+00 0.00000000e+00 ... 1.56227769e-04
  3.96908499e-16 4.50417991e-04]
 ...
 [2.20435591e-06 2.20435591e-06 2.20435591e-06 ... 1.90146751e-03
  2.29284818e-05 1.17670229e-03]
 [2.00291966e-06 2.00291966e-06 2.00291966e-06 ... 1.05144306e-03
  2.61808368e-05 7.05854758e-04]
 [1.67181457e-06 1.67181457e-06 1.67181457e-06 ... 4.02853034e-04
  2.57450122e-05 3.11650486e-04]]
0.0
inf


In [42]:
istfted = librosa.istft(stacked)
print(istfted)
np.nan_to_num(istfted, copy=False)
print(np.shape(istfted))
print(istfted)

[            nan             nan             nan ...  9.96077300e-05
 -4.55936707e-05  7.92578123e-05]
(68659051,)
[ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  9.96077300e-05
 -4.55936707e-05  7.92578123e-05]


In [74]:
ccropped = np.where(istfted>0)
print(ccropped)
print(np.shape(ccropped))
print(type(ccropped))
list = np.asarray(ccropped)
print(list)
print(np.shape(list))
print(type(list))
change = list.reshape(-1)
print(change)
print(np.shape(change))
print(type(change))

x = 10
y = 10
cnt = 22050 * y
cropped = np.zeros((22050 * x))
while cnt < 22050 * (x + y):
    cropped[cnt - 22050 * y] = istfted[change[cnt]]
    cnt+=1

print(cropped)
print(np.shape(cropped))
fs = 22050
sf.write('test.wav', cropped, fs)
ipd.Audio(cropped, rate=fs)
# librosa.display.waveshow(cropped, sr=fs)

(array([ 4147192,  4147194,  4147195, ..., 68659047, 68659048, 68659050]),)
(1, 3455695)
<class 'tuple'>
[[ 4147192  4147194  4147195 ... 68659047 68659048 68659050]]
(1, 3455695)
<class 'numpy.ndarray'>
[ 4147192  4147194  4147195 ... 68659047 68659048 68659050]
(3455695,)
<class 'numpy.ndarray'>
[5.66877979e-06 2.00183679e-05 1.90947073e-05 ... 1.18874533e-06
 2.49925044e-06 1.61216824e-06]
(220500,)


In [None]:
# fs = 460800
# fs = 691200
fs = 22050
sf.write('test.wav', istfted, fs)
ipd.Audio(istfted, rate=fs)
librosa.display.waveshow(istfted, sr=fs)
# sr = 30
# D = np.abs(stacked)
# S_dB = librosa.power_to_db(D, ref=np.max)
# img = librosa.display.specshow(S_dB, y_axis='linear', x_axis='time', hop_length=1, sr=30)
# plt.colorbar(format='%2.0f dB')
# plt.show()

In [None]:
cam.release()
flowout.release()
hsvout.release()
# fs = loopcnt
# freq = magnitude_array
# x1 = np.sin(2*np.pi*freq*np.arange(freq)/fs)
# ipd.Audio(x1, rate = fs)
print(magnitude_array)
print(loopcnt)
print('Done')