Permalink
Browse files

minor edits

  • Loading branch information...
boris-kz committed Jun 27, 2018
1 parent d1cb48c commit 3cb9fd1dd97ce3be2027831537db272504468220
Showing with 112 additions and 61 deletions.
  1. +38 −8 frame_dblobs.py
  2. +5 −5 line_POC.py
  3. +69 −48 video_draft.py
View
@@ -3,7 +3,36 @@
from time import time
from collections import deque
''' An updated version of frame_blobs with only one blob type: dblob, to ease debugging '''
''' An updated version of frame_blobs with only one blob type: dblob, to ease debugging
frame() is my core algorithm of levels 1 + 2, modified for 2D: segmentation of image into blobs, then search within and between blobs.
frame_blobs() is frame() limited to definition of initial blobs per each of 4 derivatives, vs. per 2 gradients in current frame().
In my code, Le denotes level of encoding,
prefix '_' denotes higher-line variable or pattern, vs. same-type lower-line variable or pattern,
postfix '_' denotes array name, vs. same-name elements of that array,
y per line below is shown as relative to y of current line, which is incremented with top-down input within a frame
frame_blobs() performs several steps of encoding, incremental per scan line defined by vertical coordinate y:
1Le, line y: x_comp(p_): lateral pixel comparison -> tuple t ) array t_
2Le, line y- 1: y_comp(t_): vertical pixel comp -> 2D tuple t2 ) array t2_
3Le, line y- 1+ rng*2: form_P(t2) -> 1D pattern P ) P_
4Le, line y- 2+ rng*2: scan_P_(P, _P) -> _P, fork_, root_: downward and upward connections between Ps of adjacent lines
5Le, line y- 3+ rng*2: form_blob(_P, blob) -> blob: merge connected Ps into non-forking blob segments
6Le, line y- 4+ rng*2+ + blob depth: term_blob, form_net -> net: merge connected segments into network of terminated forks
These functions are tested through form_P, I am currently debugging scan_P_.
All 2D functions (ycomp, scan_P_, etc.) input two lines: higher and lower, convert elements of lower line
into elements of new higher line, and displace elements of old higher line into some higher function.
Higher-line elements include additional variables, derived while they were lower-line elements.
Pixel comparison in 2D forms lateral and vertical derivatives: 2 matches and 2 differences per pixel.
They are formed on the same level because average lateral match ~ average vertical match.
Each vertical and horizontal derivative forms separate blobs, suppressing overlapping orthogonal representations.
They can also be summed to estimate diagonal or hypot derivatives, for blob orientation to maximize primary derivatives.
Orientation increases primary dimension of blob to maximize match, and decreases secondary dimension to maximize difference.
'''
def lateral_comp(pixel_): # comparison over x coordinate: between min_rng of consecutive pixels within each line
@@ -55,11 +84,12 @@ def vertical_comp(ders_, ders2__, _dP_, dframe):
ders2_[index] = (_p, d, m, dy, my)
elif x > min_coord and y > min_coord: # or min y is increased by x_comp on line y=0?
_v = _m - ave
vy = my + _my - ave
ders2 = _p, _d, _v, dy + _dy, vy
_v = _m - abs(d)/4 - ave # _m - abs(d)/8: projected match is cancelled by negative d/4
vy = my + _my - abs(dy)/4 - ave
ders2 = _p, _d, _v, dy + _dy, vy
dP, dP_, dbuff_, _dP_, dframe = form_P(ders2, x, dP, dP_, dbuff_, _dP_, dframe)
index += 1
ders2_.appendleft((p, d, m, 0, 0)) # initial dy and my = 0, new ders2 replaces completed t2 in vertical ders2_ via maxlen
@@ -68,15 +98,15 @@ def vertical_comp(ders_, ders2__, _dP_, dframe):
return new_ders2__, dP_, dframe # extended in scan_P_; net_s are packed into frames
def form_P(ders2, x, P, P_, buff_, _P_, frame): # terminates, initializes, accumulates 1D pattern: dP | vP | dyP | vyP
def form_P(ders2, x, P, P_, buff_, _P_, frame): # initializes, accumulates, and terminates 1D pattern: dP | vP | dyP | vyP
p, d, v, dy, vy = ders2 # 2D tuple of derivatives per pixel, "y" for vertical dimension:
s = 1 if d > 0 else 0 # core = 0 is negative: no selection?
if s == P[0] or x == rng * 2: # s == pri_s or initialized pri_s: P is continued, else terminated:
pri_s, I, D, Dy, V, Vy, ders2_ = P
else:
if y == rng * 2: # first line of Ps -> P_, _P_ is empty till vertical comp returns P_:
if y == rng * 2: # first line of Ps -> P_, _P_ is empty till vertical_comp returns P_:
P_.append((P, x-1, [])) # empty _fork_ in the first line of _Ps, x-1: delayed P displacement
else:
P_, buff_, _P_, frame = scan_P_(x-1, P, P_, buff_, _P_, frame) # scans higher-line Ps for contiguity
@@ -117,8 +147,8 @@ def scan_P_(x, P, P_, _buff_, _P_, frame): # P scans shared-x-coordinate _Ps in
if _x > ix: # x overlap between _P and next P: _P is buffered for next scan_P_, else included in blob_:
buff_.append((_P, _x, _fork_, root_))
else:
if y > rng * 2 + 1 and x < X - 99 and len(_fork_) == 1 and _fork_[0][0][5] == 1: # no fork blob if x < X - len(fork_P[6])?
# if blob _fork_ == 1 and _fork roots == 1, always > 0?
if len(_fork_) == 1 and _fork_[0][0][5] == 1 and y > rng * 2 + 1 and x < X - 99: # no fork blob if x < X - len(fork_P[6])?
# if blob _fork_ == 1 and _fork roots == 1, always > 0, must be improperly incremented outside of scan_P_
blob = form_blob(_fork_[0], _P, _x) # y-2 _P is packed in y-3 _fork_[0] blob + __fork_
else:
ave_x = _x - len(_P[6]) / 2 # average x of P: always integer?
View
@@ -17,11 +17,11 @@
def recursive_comparison(x, p, pri_p, d, v, pri_d, pri_m, dP, vP, dP_, vP_, X, redun, rng):
# incremental-range comp within vPs or incremental-derivation comp within dPs,
# called from pre_recursive_comp(), which is called from form_P
# called from pre_recursive_comp(), which is called from form_pattern
d += p - pri_p # fuzzy d accumulates differences between p and all prior and subsequent ps in extended rng
m = min(p, pri_p)
v += m + pri_m - abs(d + pri_d)/4 - ave*2 # fuzzy v accumulates deviation of match within bilateral extended rng
v += m + pri_m - abs(d + pri_d) /4 - ave *2 # fuzzy v accumulates deviation of match within bilateral extended rng
dP, dP_ = form_pattern(0, dP, dP_, pri_p, d, v, x, X, redun, rng)
vP, vP_ = form_pattern(1, vP, vP_, pri_p, d, v, x, X, redun, rng)
@@ -47,7 +47,7 @@ def pre_recursive_comp(typ, element_, redun, rng): # pre-processing for comp re
pri_p, d, v = element_[x-rng]
pri_d, pri_m, d, v, dP, vP, dP_, vP_ = recursive_comparison(x, p, pri_p, d, v, pri_d, pri_m, dP, vP, dP_, vP_, X, redun, rng)
else: # comparison derivation increment within element_ = d_ of dP:
else: # comparison derivation increment within element_ = d_ of dP:
pri_p = element_[0]
d, v = 0, 0
for x in range(1, X):
@@ -109,8 +109,8 @@ def comparison(x, p, pri_d, pri_m, rng_ders_, dP, vP, dP_, vP_, X): # pixel is
elif x > min_rng * 2 - 1: # ders are accumulated over full bilateral rng: before and rng after displaced pixel
v = (m + pri_m) - abs(d + pri_d) /4 - ave * min_rng *2 # m - abs(d)/4: bilateral projected match reduced by neg d/2
# predictive value of match, sign for inclusion into positive | negative vP
v = (m + pri_m) - abs(d + pri_d) /4 - ave * min_rng *2 # m - abs(d)/4: bilateral projected match is reduced by neg d/2
# predictive value of match, sign determines inclusion into positive | negative vP
# completed tuple (pri_p, d, v) of summation range = rng (maxlen in rng_t_) transferred to form_pattern,
# to form difference pattern dP: span of pixels with same-sign d, or value pattern vP: span of pixels with same-sign v:
View
@@ -3,19 +3,24 @@
from time import time
from collections import deque
''' Comparison over a sequence frames in a video, currently only initial pixel tuple formation:
''' This file is currently just a stab.
Comparison over a sequence frames in a video, currently only initial ders-per-pixel tuple formation:
immediate pixel comparison to rng consecutive pixels over lateral x, vertical y, temporal t coordinates,
resulting 3D tuples (p, d, m, dy, my, dt, mt) per pixel are combined into
then resulting 3D tuples (p, dx, mx, dy, my, dt, mt) per pixel are combined into
incremental-dimensionality patterns: 1D Ps ) 2D blobs ) TD durables, not oriented for inclusion?
evaluated for orientation, re-composition, incremental-dimensionality comparison, and its recursion?
2D blob synq: bottom-up and right-to-left, potential overlap by max / min coord (from form_network),
ave coord comp -> match, evaluated before computing specific overlap by cross-comparing blob segments,
orientation in 2D only, time is neutral unless mapped to depth?
'''
recursive input scope unroll: .multiple ( integer ( binary, accessed if hLe match * lLe total,
comp power = depth of content: normalized by hLe pwr miss if hLe diff * hLe match * lLe total
3rd comp to 3rd-level ff -> filter pattern: longer-range nP forward, binary trans-level fb:
complemented P: longer-range = higher-level ff & higher-res fb, recursion eval for positive Ps?
colors will be defined as color / sum-of-colors, and single-color patterns are within grey-scale patterns:

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 16, 2018

Collaborator

"colors will be defined as color / sum-of-colors, and single-color patterns are within grey-scale patterns:
primary white patterns ( sub-patterns per relative color, not cross-compared: already complementary?"

That's easy to compute, but is it correct?
What about HSV color space:
https://en.wikipedia.org/wiki/HSL_and_HSV

Since you're using cv2 to load images:
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

Also, the BW formula for human-like color perception is: 0.21 R + 0.72 G + 0.07 B

@Twenkid

Twenkid Aug 16, 2018

Collaborator

"colors will be defined as color / sum-of-colors, and single-color patterns are within grey-scale patterns:
primary white patterns ( sub-patterns per relative color, not cross-compared: already complementary?"

That's easy to compute, but is it correct?
What about HSV color space:
https://en.wikipedia.org/wiki/HSL_and_HSV

Since you're using cv2 to load images:
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

Also, the BW formula for human-like color perception is: 0.21 R + 0.72 G + 0.07 B

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 16, 2018

Owner
@boris-kz

boris-kz via email Aug 16, 2018

Owner

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 17, 2018

Collaborator

Human ratios are to more directly map human sight and ranges, since it will be the initial benchmark.

In human perceptions BW is also more informative (luma vs chroma in TV and video encoding is usually of higher frequency/bandwidth).

HSV/HSL are more optimal representations for operating with color parameters.

As of the stable ratios - how would you achive it that way? Maybe it requires active adjustment with variables.

The light also has a color (color temperature) which changes the color of the objects and for color-wise constant vision there's a running white-balance adjustment.

@Twenkid

Twenkid Aug 17, 2018

Collaborator

Human ratios are to more directly map human sight and ranges, since it will be the initial benchmark.

In human perceptions BW is also more informative (luma vs chroma in TV and video encoding is usually of higher frequency/bandwidth).

HSV/HSL are more optimal representations for operating with color parameters.

As of the stable ratios - how would you achive it that way? Maybe it requires active adjustment with variables.

The light also has a color (color temperature) which changes the color of the objects and for color-wise constant vision there's a running white-balance adjustment.

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 17, 2018

Owner

Human ratios are to more directly map human sight and ranges, since it will be the initial benchmark.

Ok, that may help, but it's pretty far down the road.

HSV/HSL are more optimal representations for operating with color parameters.

For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern of wiring between retina cones of different color. Do you know of any objective reason for that?

As of the stable ratios - how would you achive it that way? Maybe it requires active adjustment with variables.
The light also has a color (color temperature) which changes the color of the objects and for color-wise constant
vision there's a running white-balance adjustment.

I meant that light color is relatively more stable than intensity. For example, intensity changes a lot more than color with shading and the angle of illumination.

@boris-kz

boris-kz Aug 17, 2018

Owner

Human ratios are to more directly map human sight and ranges, since it will be the initial benchmark.

Ok, that may help, but it's pretty far down the road.

HSV/HSL are more optimal representations for operating with color parameters.

For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern of wiring between retina cones of different color. Do you know of any objective reason for that?

As of the stable ratios - how would you achive it that way? Maybe it requires active adjustment with variables.
The light also has a color (color temperature) which changes the color of the objects and for color-wise constant
vision there's a running white-balance adjustment.

I meant that light color is relatively more stable than intensity. For example, intensity changes a lot more than color with shading and the angle of illumination.

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 17, 2018

Collaborator

I meant that light color is relatively more stable than intensity. For example, intensity changes a lot more than color with shading and the angle of illumination.

Now that you repeat that - color changes less as a hue, one out of several buckets. However the RGB values for different hues and value are quite different in RGB and technically there are 16M colors in the 24-bit mode. If you reduce the number of steps in the BW, it would also change less.
0, 3, 8 - very dark blue
31, 156, 247 - very light blue
0, 0, 255 is also blue

31,94,25 - some middle green
190, 228, 171 - light green-yellowish
0, 255, 0 is also green
0, 1, 0 is also green

It's related to ratios, but it's not color/sum_of_colors, because color information'd be lost.
The sum is correlated with the brightness/lightness.

HSV/HSL are more optimal representations for operating with color parameters.
For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern of wiring between retina cones of different color. Do you know of any objective reason for that?

I don't, but I may speculate that the number of "main" colors may be related to some architectural optimal number of classes for some bandwidth - as words' length is related to some vocal/sound "working memory" capacity and breathing. In magnitude it doesn't seem much different to the magic number 7+- 2. Maybe it's related to some sweet spot of relative constancy of the color to keep a measurement of continuity going for long enough, for perceiving an object as a whole, yet enough of contrast to distinguish one object from another in some good enough model of an "average environment".

I can speculate about the BW - blue has a share of 0.07, maybe because the sky is brighter than the landscape, in the BW it could overweight the other components. Green has highest sensitivity in order to see better in the forest and to detect more details on a grass field. Red is in the middle - for the sunrise and the sunset, which were important borderline periods when the light color was red-orange.

These reasons could match the R-G-B division as well, thinking of them not for capturing the intermediate nuances, but to distinguish the main classes of inputs - the main natural colored light sources (spectrum peaks) - B and R and the sky - and the most frequent environmental color - G.

...

Actually HSV/HSL are taking into account changing of the "illumination" (light, dark,... color) while keeping the more general color class constant.

For non-human, if you mean that the other one would be better. I don't know how much it matters at that low level. After converting to HSV for example it's much easier to track color objects or do chroma keying ("green screen").

Nonhuman - a "better" (whatever it means) superhuman vision - what do you mean?

Optimizations, more efficient/simple representations (lossless?)? As of CogAlg, I assume that such representations could develop for/per particular patterns, environments etc., to include parameters about light, surface properties, color of the surface, etc., but that would be sort of a higher level or bigger range property, like the "materials" in 3D, it won't be just "color"anymore.

Or in another extreme, some "variables" within the patterns which locally have no direct/simple mapping to "color". In the DL Andrew Ng used to say (approximately) that phonemes and other hand-crafted features didn't exist, they were theoretical constructs of the linguists. Let the algorithm discover what representation is good for its goals.

OK, it discovers it, using a huge amount of labeled data, however this ends up with thousands and millions of intertwined parameters dependent on the whole training set, while the hand-crafted features are usually local and defined by a handful of independent variables, like the normal color.

@Twenkid

Twenkid Aug 17, 2018

Collaborator

I meant that light color is relatively more stable than intensity. For example, intensity changes a lot more than color with shading and the angle of illumination.

Now that you repeat that - color changes less as a hue, one out of several buckets. However the RGB values for different hues and value are quite different in RGB and technically there are 16M colors in the 24-bit mode. If you reduce the number of steps in the BW, it would also change less.
0, 3, 8 - very dark blue
31, 156, 247 - very light blue
0, 0, 255 is also blue

31,94,25 - some middle green
190, 228, 171 - light green-yellowish
0, 255, 0 is also green
0, 1, 0 is also green

It's related to ratios, but it's not color/sum_of_colors, because color information'd be lost.
The sum is correlated with the brightness/lightness.

HSV/HSL are more optimal representations for operating with color parameters.
For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern of wiring between retina cones of different color. Do you know of any objective reason for that?

I don't, but I may speculate that the number of "main" colors may be related to some architectural optimal number of classes for some bandwidth - as words' length is related to some vocal/sound "working memory" capacity and breathing. In magnitude it doesn't seem much different to the magic number 7+- 2. Maybe it's related to some sweet spot of relative constancy of the color to keep a measurement of continuity going for long enough, for perceiving an object as a whole, yet enough of contrast to distinguish one object from another in some good enough model of an "average environment".

I can speculate about the BW - blue has a share of 0.07, maybe because the sky is brighter than the landscape, in the BW it could overweight the other components. Green has highest sensitivity in order to see better in the forest and to detect more details on a grass field. Red is in the middle - for the sunrise and the sunset, which were important borderline periods when the light color was red-orange.

These reasons could match the R-G-B division as well, thinking of them not for capturing the intermediate nuances, but to distinguish the main classes of inputs - the main natural colored light sources (spectrum peaks) - B and R and the sky - and the most frequent environmental color - G.

...

Actually HSV/HSL are taking into account changing of the "illumination" (light, dark,... color) while keeping the more general color class constant.

For non-human, if you mean that the other one would be better. I don't know how much it matters at that low level. After converting to HSV for example it's much easier to track color objects or do chroma keying ("green screen").

Nonhuman - a "better" (whatever it means) superhuman vision - what do you mean?

Optimizations, more efficient/simple representations (lossless?)? As of CogAlg, I assume that such representations could develop for/per particular patterns, environments etc., to include parameters about light, surface properties, color of the surface, etc., but that would be sort of a higher level or bigger range property, like the "materials" in 3D, it won't be just "color"anymore.

Or in another extreme, some "variables" within the patterns which locally have no direct/simple mapping to "color". In the DL Andrew Ng used to say (approximately) that phonemes and other hand-crafted features didn't exist, they were theoretical constructs of the linguists. Let the algorithm discover what representation is good for its goals.

OK, it discovers it, using a huge amount of labeled data, however this ends up with thousands and millions of intertwined parameters dependent on the whole training set, while the hand-crafted features are usually local and defined by a handful of independent variables, like the normal color.

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 18, 2018

Owner

I meant that light color is relatively more stable than intensity.
For example, intensity changes a lot more than color with shading and the angle of illumination.

Now that you repeat that - color changes less as a hue, one out of several buckets.

Because hue is a mixed color. Information is destroyed, so there is less to change.

It's related to ratios, but it's not color/sum_of_colors, because color information'd be lost.

No, it's not. Don't know where you got that.

The sum is correlated with the brightness/lightness.

HSV/HSL are more optimal representations for operating with color parameters.
For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern > of wiring between retina cones of different color. Do you know of any objective reason for that?

These reasons could match the R-G-B division as well,

RGB is primary input, I don't have any choice over that.
But hues are compound, in what seems to me a dirty and arbitrary fashion.
I already have a compound over the whole spectrum: BW.

Actually HSV/HSL are taking into account changing of the "illumination" (light, dark,... color)
while keeping the more general color class constant.

There is nothing inherently general about hues, I think it's just a quirk of human vision.

Nonhuman - a "better" (whatever it means) superhuman vision - what do you mean?

I want to model reality, not human brain or human eye.
RGB is real, hues seem to be a convoluted artifact with no objective justification.

@boris-kz

boris-kz Aug 18, 2018

Owner

I meant that light color is relatively more stable than intensity.
For example, intensity changes a lot more than color with shading and the angle of illumination.

Now that you repeat that - color changes less as a hue, one out of several buckets.

Because hue is a mixed color. Information is destroyed, so there is less to change.

It's related to ratios, but it's not color/sum_of_colors, because color information'd be lost.

No, it's not. Don't know where you got that.

The sum is correlated with the brightness/lightness.

HSV/HSL are more optimal representations for operating with color parameters.
For human vision. As far as I can tell, this separation into subjectively distinct hues is probably due to some pattern > of wiring between retina cones of different color. Do you know of any objective reason for that?

These reasons could match the R-G-B division as well,

RGB is primary input, I don't have any choice over that.
But hues are compound, in what seems to me a dirty and arbitrary fashion.
I already have a compound over the whole spectrum: BW.

Actually HSV/HSL are taking into account changing of the "illumination" (light, dark,... color)
while keeping the more general color class constant.

There is nothing inherently general about hues, I think it's just a quirk of human vision.

Nonhuman - a "better" (whatever it means) superhuman vision - what do you mean?

I want to model reality, not human brain or human eye.
RGB is real, hues seem to be a convoluted artifact with no objective justification.

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 18, 2018

Collaborator
@Twenkid

Twenkid via email Aug 18, 2018

Collaborator

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 18, 2018

Owner
@boris-kz

boris-kz via email Aug 18, 2018

Owner

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 18, 2018

Collaborator
@Twenkid

Twenkid via email Aug 18, 2018

Collaborator

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 18, 2018

Owner
@boris-kz

boris-kz via email Aug 18, 2018

Owner

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 18, 2018

Collaborator

This is objective data, all post-processing only makes it palatable to human eye.

It's more often for computationally more efficient ways to store or process the image data, store it with less of a loss etc. It's rather for the computers and the electronics, than for the human eye.

They usually exploit the limitations of the human sight in order to save on bandwidth, when that's desirable, it doesn't make things more pretty when post-processing is required and the artifacts appear, such as pale colors, noise and low contrast.

...

Regarding cameras, when teaching your algorithm on random "long" videos with continuously meeting different scenes in the same record, if it doesn't know about the presence of external to the images camera parameters, it'd be confused in scenes of changing light or apperture/shutter speed/sensitivity.

Cameras or operators adjust them to map your underflow-overflow cases.

However this information is not stored in the "normal random" videos from third party (if you downloaded them from Youtube), except indirectly by the events of appearance of heavy under- and overexposure and then gradual adjustment (or staying in that stage, with big areas at the maximum or minimum brightness).

Your algorithm may notice that these events happen when there's over or under exposure, but couldn't correctly get the magnitude of the adjustments, it could guess it by the amount of under/over exposure and the time needed for adjustment which sometimes suggest that for some cameras.

Similarly for focus, DOF, but there it'd notice that some objects get sharp, others get blurry and could infer "focused" and "out of focus" classes and their degree.

@Twenkid

Twenkid Aug 18, 2018

Collaborator

This is objective data, all post-processing only makes it palatable to human eye.

It's more often for computationally more efficient ways to store or process the image data, store it with less of a loss etc. It's rather for the computers and the electronics, than for the human eye.

They usually exploit the limitations of the human sight in order to save on bandwidth, when that's desirable, it doesn't make things more pretty when post-processing is required and the artifacts appear, such as pale colors, noise and low contrast.

...

Regarding cameras, when teaching your algorithm on random "long" videos with continuously meeting different scenes in the same record, if it doesn't know about the presence of external to the images camera parameters, it'd be confused in scenes of changing light or apperture/shutter speed/sensitivity.

Cameras or operators adjust them to map your underflow-overflow cases.

However this information is not stored in the "normal random" videos from third party (if you downloaded them from Youtube), except indirectly by the events of appearance of heavy under- and overexposure and then gradual adjustment (or staying in that stage, with big areas at the maximum or minimum brightness).

Your algorithm may notice that these events happen when there's over or under exposure, but couldn't correctly get the magnitude of the adjustments, it could guess it by the amount of under/over exposure and the time needed for adjustment which sometimes suggest that for some cameras.

Similarly for focus, DOF, but there it'd notice that some objects get sharp, others get blurry and could infer "focused" and "out of focus" classes and their degree.

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 18, 2018

Collaborator

Also, if the sensors' data are the "objective" source, then start with "RAW", such as sensors voltage readings and sub-pixel data... The output image from cameras that is visible is not raw, it's processed, denoised, white-balance adjusted, compressed, ...

https://en.wikipedia.org/wiki/Raw_image_format

@Twenkid

Twenkid Aug 18, 2018

Collaborator

Also, if the sensors' data are the "objective" source, then start with "RAW", such as sensors voltage readings and sub-pixel data... The output image from cameras that is visible is not raw, it's processed, denoised, white-balance adjusted, compressed, ...

https://en.wikipedia.org/wiki/Raw_image_format

This comment has been minimized.

Show comment
Hide comment
@boris-kz

boris-kz Aug 18, 2018

Owner
@boris-kz

boris-kz via email Aug 18, 2018

Owner

This comment has been minimized.

Show comment
Hide comment
@Twenkid

Twenkid Aug 19, 2018

Collaborator
@Twenkid

Twenkid via email Aug 19, 2018

Collaborator
primary white patterns ( sub-patterns per relative color, not cross-compared: already complementary?
'''
def lateral_comp(pixel_): # comparison over x coordinate: between min_rng of consecutive pixels within each line
@@ -68,7 +73,7 @@ def vertical_comp(ders_, rng_ders2__): # comparison between rng vertically cons
return new_ders2__, ders2_ # no laterally incomplete tuples?
def temporal_comp(ders2_, rng_ders3__, _dP_, sequence):
def temporal_comp(ders2_, rng_ders3__, _dP_, _blob_, sequence):
# ders2_: a frame of 2D tuples, all scan lines are spliced into one array
# rng_ders3__: an older frame of 3D tuple arrays, all scan lines are spliced into one array
@@ -96,46 +101,46 @@ def temporal_comp(ders2_, rng_ders3__, _dP_, sequence):
ders3_[index] = (_p, d, m, dy, my, dt, mt)
elif x > min_coord and y > min_coord and t > min_coord:
_v = _m - ave
_vy = _my - ave
vt = mt +_mt - ave
ders3 = _p, _d, _v, _dy, _vy, dt + _dt, vt
_v = _m - abs(d)/4 - ave # _m - abs(d)/4: projected match is cancelled by negative d/2
_vy = _my - abs(dy)/4 - ave
vt = mt +_mt - abs(dt)/4 - ave
ders3 = _p, _d, _v, _dy, _vy, dt + _dt, vt
dP, dP_, dbuff_, _dP_, sequence = form_P(0, ders3, x, y, dP, dP_, dbuff_, _dP_, sequence)
# start form 1D, but blob_buff? # generic form_P( dP)
index += 1
index += 1 # start form 1D, but blob_buff? # generic form_P( dP)
rng_ders3__.appendleft((p, d, m, dy, my, 0, 0)) # initial dt and mt = 0, new ders3 replaces completed ders3 in temporal ders3_ via maxlen
new_ders3__.append((ders3_, dt, mt)) # temporally-incomplete 2D array of tuples, converted to ders3__ for next-frame comp
return new_ders3__, dP_, sequence # extended in scan_P_; net_s are packed into frames
return new_ders3__, dP_, _blob_, sequence # extended in scan_P_; net_s are packed into frames
def form_P(typ, ders3, x, y, P, P_, buff_, _P_, sequence): # terminates, initializes, accumulates 1D pattern: dP | vP | dyP | vyP
p, dx, vx, dy, vy, dt, vt = ders3 # 3D tuple of derivatives per pixel, "x" for lateral D, "y" for vertical D, "t" for temporal D:
if typ == 0: core = dx; alt1 = vx; alt2 = dy; alt3 = dt; alt4 = vy; alt5 = vt
elif typ == 1: core = vx; alt1 = dx; alt2 = vy; alt3 = vt; alt4 = dy; alt5 = dt
elif typ == 2: core = dy; alt1 = vy; alt2 = dx; alt3 = dt; alt4 = vx; alt5 = vt
elif typ == 3: core = vy; alt1 = dy; alt2 = vx; alt3 = vt; alt4 = dx; alt5 = dt
elif typ == 4: core = dt; alt1 = vt; alt2 = dx; alt3 = dy; alt4 = vx; alt5 = vy
else: core = vt; alt1 = dt; alt2 = vx; alt3 = vy; alt4 = dx; alt5 = dy
if typ == 0: core = dx; alt0 = vx; alt1 = dy; alt2 = dt; alt3 = vy; alt4 = vt
elif typ == 1: core = vx; alt0 = dx; alt1 = vy; alt2 = vt; alt3 = dy; alt4 = dt
elif typ == 2: core = dy; alt0 = vy; alt1 = dx; alt2 = dt; alt3 = vx; alt4 = vt
elif typ == 3: core = vy; alt0 = dy; alt1 = vx; alt2 = vt; alt3 = dx; alt4 = dt
elif typ == 4: core = dt; alt0 = vt; alt1 = dx; alt2 = dy; alt3 = vx; alt4 = vy
else: core = vt; alt0 = dt; alt1 = vx; alt2 = vy; alt3 = dx; alt4 = dy
# core: variable that defines current type of pattern, alt cores define overlapping alternative-type patterns:
# core: variable that defines current type of pattern, 5 alt cores define overlapping alternative-type patterns:
# alt derivative, alt direction, alt 2nd direction, alt derivative + direction, alt 2nd derivative + direction
s = 1 if core > 0 else 0 # core = 0 is negative: no selection?
if s == P[0] or x == rng*2 or y == rng*2: # s == pri_s or initialized pri_s: P is continued, else terminated:
pri_s, I, Dx, Dy, Dt, Vx, Vy, Vt, Alt1, Alt2, Alt3, Alt4, Alt5, ders3_ = P
pri_s, I, Dx, Dy, Dt, Vx, Vy, Vt, Alt0, Alt1, Alt2, Alt3, Alt4, ders3_ = P
else:
if t == rng*2: # first frame of Ps, _P_ is empty till temporal comp returns P_:
P_.append((P, x-1, y-1, [])) # empty _fork_ in the first frame of _Ps, x-1 and y-1: delayed P displacement
else:
P_, buff_, _P_, sequence = scan_P_(typ, x-1, y-1, P, P_, buff_, _P_, sequence)
# scans prior-frame Ps for contiguity
I, Dx, Dy, Dt, Vx, Vy, Vt, Alt1, Alt2, Alt3, Alt4, Alt5, ders3_ = 0,0,0,0,0,0,0,0,0,0,0,0,[] # P initialization
I, Dx, Dy, Dt, Vx, Vy, Vt, Alt0, Alt1, Alt2, Alt3, Alt4, ders3_ = 0,0,0,0,0,0,0,0,0,0,0,0,[] # P initialization
I += p # summed input and derivatives are accumulated as P and alt_P parameters, continued or initialized:
Dx += dx # lateral D
@@ -144,57 +149,71 @@ def form_P(typ, ders3, x, y, P, P_, buff_, _P_, sequence): # terminates, initia
Vx += vx # lateral V
Vy += vy # vertical V
vt += vt # temporal V
Alt1 += abs(alt1) # abs Alt cores indicate value of redundant alt-core Ps, to compute P redundancy rate
Alt2 += abs(alt2) # vs. specific overlaps: cost >> gain in precision?
Alt0 += abs(alt0) # abs Alt cores indicate value of redundant alt-core Ps, to compute P redundancy rate
Alt1 += abs(alt1) # vs. specific overlaps: cost >> gain in precision?
Alt2 += abs(alt2)
Alt3 += abs(alt3)
Alt4 += abs(alt4)
Alt5 += abs(alt5)
ders3_.append(ders3) # ders3 is buffered for oriented rescan and incremental range | derivation comp
P = s, I, Dx, Dy, Dt, Vx, Vy, Vt, Alt1, Alt2, Alt3, Alt4, Alt5, ders3_
P = s, I, Dx, Dy, Dt, Vx, Vy, Vt, Alt0, Alt1, Alt2, Alt3, Alt4, ders3_
return P, P_, buff_, _P_, sequence # accumulated within a frame
''' color: primarily white, internal sub-patterns per relative color, not cross-compared because already complementary?
recursive access of compositionally-lower levels of pattern: normalized for comp if min d(dim) -> r(dim)?
scope hierarchy: ...multiple ( integer ( binary, unrolled if hLe match * lLe total,
power = depth of content, norm if hLe diff * hLe match * lLe total
'''
to be added:
scan_P_, form_blob, term_blob, form_net, term_net,
then sequential dimension add, from root() at frame end:
scan_blob_: 2D synq: bottom-up and right-to-left, potential overlap by max / min coord (from form_network),
ave coord comp -> match, evaluated before computing specific overlap by cross-comparing blob segments,
hier contig eval: possible cont -> ave_coord comp -> dim & ave comp -> exact cont & comp?
orientation in 2D only, time is neutral unless mapped to depth?
but orient eval after persistence term: for comp over sequence?
'''
def sequence_to_durables(f): # postfix '_' distinguishes array vs. element, prefix '_' distinguishes higher-line vs. lower-line variable
def sequence_to_durables(f): # currently only a draft
# postfix '_' denotes array vs. element, prefix '_' denotes prior- pixel, line, or frame variable
_P_ = deque() # higher-line same- d-, v-, dy-, vy- sign 1D patterns
_P_ = deque() # higher line of same- d- | v- | dy- | vy- sign 1D patterns
_blob_ = deque() # prior frame of same-sign 2D blobs
frame = 0, 0, 0, 0, 0, 0, 0, [] # Dxf, Lf, If, Df, Dyf, Vf, Vyf, net_
global t; t = 0 # temporal coordinate of current frame
ders2_ = deque(maxlen=rng) # vertical buffer of incomplete quadrant tuples, for fuzzy ycomp
ders2_ = deque(maxlen=rng) # vertical buffer of incomplete ders2s, for fuzzy y_comp init
# ders3_ = deque(maxlen=rng) # temporal buffer of incomplete ders3s, for fuzzy t_comp init?
rng_ders2__= [] # vertical buffer + horizontal line: 2D array of 2D tuples, deque for speed?
rng_ders3__= [] # temporal buffer per pixel of a frame: 3D tuples in 3D -> 2D array
rng_ders3__= deque() # temporal buffer per pixel of a frame: 3D tuples in 3D -> 2D array
pixel_ = f[0, :] # first line of pixels
ders_ = lateral_comp(pixel_) # after part_comp (pop, no t_.append) while x < rng?
# initialization:
line_ = f[0] # first frame of lines?
pixel_= line_[0, :] # first line of pixels
ders_ = lateral_comp(pixel_) # after part_comp (pop, no ders_.append) while x < rng?
for (p, d, m) in ders_:
ders2 = p, d, m, 0, 0 # dy, my initialized at 0
ders2_.append(ders2) # only one tuple per first-line t2_
ders2_.append(ders2) # only one tuple per first-line ders2_
rng_ders2__.append((ders2_, 0, 0)) # _dy, _my initialized at 0
for y in range(1, Y): # or Y-1: default term_blob in scan_P_ at y = Y?
for y in range(1, Y): # or Y-1: default term_blob in scan_P_ at y = Y? or no comp, 1st frame initialization only?
pixel_ = f[y, :] # vertical coordinate y is index of new line p_
pixel_ = f[y, :] # vertical coordinate y is index of new line pixel_
ders_ = lateral_comp(pixel_) # lateral pixel comparison
ders2__, rng_ders2__ = vertical_comp(ders_, rng_ders2__) # vertical pixel comparison
for t in range(1, T):
for t in range(1, T): # actual processing
pixel_ = f[t, :] # vertical coordinate y is index of new line p_
line_ = f[t, :] # temporal coordinate t is index of new frame line_
pixel_ = f[t, :]
ders_ = lateral_comp(pixel_) # lateral pixel comparison
ders2__, rng_ders2__ = vertical_comp(ders_, rng_ders2__) # vertical pixel comparison
new_ders3__, dP_, sequence = temporal_comp(ders2_, rng_ders3__, _dP_, sequence) # temporal pixel comparison
new_ders3__, P_, blob_, sequence = temporal_comp(ders2_, rng_ders3__, _P_, _blob_, sequence) # temporal pixel comparison
# frame ends, last vertical rng of incomplete t2__ is discarded,
# but vertically incomplete P_ patterns are still inputted in scan_P_?
# sequence ends, incomplete ders3__ discarded, but vertically incomplete blobs are still inputted in scan_blob_?
return frame # frame of 2D patterns is outputted to level 2
@@ -209,6 +228,8 @@ def sequence_to_durables(f): # postfix '_' distinguishes array vs. element, pre
arguments = vars(argument_parser.parse_args())
# read image as 2d-array of pixels (gray scale):
# this is wrong for video, just a placeholder
image = cv2.imread(arguments['image'], 0).astype(int)
Y, X = image.shape # image height and width

0 comments on commit 3cb9fd1

Please sign in to comment.