In [160]:
%matplotlib qt

import matplotlib.pyplot as plt
import scipy.optimize as opt
import pandas as pd 
import numpy as np 
import cv2

In [161]:
# paths
df_false_positives = pd.read_csv("/media/ivan/Ivan/data_20_2_24/data_false_positives_production_day_first_condition.csv")
df_small_red = pd.read_csv("/media/ivan/Ivan/data_20_2_24/data_small_red.csv")
df_big_red_meat = pd.read_csv("/media/ivan/Ivan/data_20_2_24/data_big_red_on_meat.csv")
df_big_red_fat = pd.read_csv("/media/ivan/Ivan/data_20_2_24/data_big_red_on_fat.csv")

# take out first column (positions)
df_false_positives = df_false_positives.drop(df_false_positives.columns[0], axis='columns')  
df_small_red = df_small_red.drop(df_small_red.columns[0], axis='columns')
df_big_red_meat = df_big_red_meat.drop(df_big_red_meat.columns[0], axis='columns')
df_big_red_fat = df_big_red_fat.drop(df_big_red_fat.columns[0], axis='columns') 

In [162]:
# shapes/sizes of data
print("============ Data ============")
print(f'False positives: {df_false_positives.shape}')
print(f'Small red: {df_small_red.shape}')
print(f'Big red on meat: {df_big_red_meat.shape}')
print(f'Big red on fat: {df_big_red_fat.shape}')

False positives: (9622, 184)
Small red: (7820, 184)
Big red on meat: (66798, 184)
Big red on fat: (59152, 184)


# Working with only 2 points (25, 80)

In [163]:
# We take points/spectral bands of our interest
df_false_positives_m = df_false_positives.iloc[:, [25,80,154]]
df_small_red_m = df_small_red.iloc[:, [25,80,154]]
df_big_red_meat_m = df_big_red_meat.iloc[:, [25,80,154]]
df_big_red_fat_m = df_big_red_fat.iloc[:, [25,80,154]]

# into numpy arrays (only points of interest)
false_positives = np.array(df_false_positives_m)
small_red = np.array(df_small_red_m)
big_red_meat = np.array(df_big_red_meat_m)
big_red_fat = np.array(df_big_red_fat_m)

In [164]:
def f(x, data_array):
    
    # Number of samples (data)
    m = len(data_array)
    
    for i in range(len(data_array)):
        
        # Values of each point at each iteration
        value25, value80, value154 = data_array[i]

        # Operation 
        calc = (1/2*m)*(pow((value154*x[0]-value25),2)+pow((value154*x[1]-value80),2))

        return calc

In [165]:
# Optimization using gradient descent
start = np.array([0, 0])    # starting point

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_cg.html
result_g = opt.fmin_cg(f, start, args=(small_red,))
print(result_g)

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 2
         Function evaluations: 15
         Gradient evaluations: 5
[4.14753658 3.09279443]


In [169]:
# Check if it works
y1, y2 = result_g
samples = small_red.shape[0]
count = 0

for i in range(samples):
    value25, value80, value154 = small_red[i]
    test = (1/4)*(pow((value154*y1-value25), 2) + pow(value154*y2-value80, 2))  # 4 --> 2*m(m=number of points used)
    # print(test)
    if test < 0.0001:
        count += 1

print(f'Working on: {count} samples out of a total of {samples} (false positive samples)')

Working on: 882 samples out of a total of 7820 (false positive samples)


# PLOTTING 

In [None]:
# Entire spectrum into numpy arrays
false_positives_e = np.array(df_false_positives)
small_red_e = np.array(df_small_red)
big_red_meat_e = np.array(df_big_red_meat)
big_red_fat_e = np.array(df_big_red_fat)

Plot entire spectrum

In [None]:
# # for larger datasets --> taking 10k random samples
# random_big_red_meat = np.array(df_big_red_meat.sample(n=10000))
# random_big_red_fat = np.array(df_big_red_fat.sample(n=10000))

# for _, sample_signal in enumerate(small_red_e):
#     plt.plot(sample_signal)

# plt.title('Big red on fat (10k random samples)')
# plt.xlabel('x')
# plt.ylabel('y')
# plt.legend()
# plt.show()

Plot comparison small red - false positive

In [None]:
random_sample_small_red = df_small_red.sample(n=1)
s1 = random_sample_small_red.values.tolist()[0] 

random_sample_fp = df_false_positives.sample(n=1)
s2 = random_sample_fp.values.tolist()[0] 

# plot
plt.clf()
plt.plot(s1, label='Small red', color='red')
plt.plot(s2, label='False positive', color='blue')

# Points values
plt.text(25, s1[25]+0.015, f'Point 25: {s1[25]}', fontsize=10, color='red')
plt.text(80, s1[80]+0.015, f'Point 80: {s1[80]}', fontsize=10, color='red')
plt.text(154, s1[154]+0.015, f'Point 154: {s1[154]}', fontsize=10, color='red')
plt.text(25, s2[25]-0.015, f'Point 25: {s2[25]}', fontsize=10, color='blue')
plt.text(80, s2[80]-0.015, f'Point 80: {s2[80]}', fontsize=10, color='blue')
plt.text(154, s2[154]-0.015, f'Point 154: {s2[154]}', fontsize=10, color='blue')

plt.title('Comparison small red - false positive')
plt.xlabel('Index')
plt.ylabel('Value')
plt.legend()
plt.show()

Plot comparison small red - false positive --> NORMALIZED

In [None]:
random_sample_small_red = df_small_red.sample(n=1)
s1 = random_sample_small_red.values.tolist()[0] 

random_sample_fp = df_false_positives.sample(n=1)
s2 = random_sample_fp.values.tolist()[0] 

# Normalization using max
# max_s1 = max(s1)
# max_s2 = max(s2)

# Normalization using value 154
max_s1 = s1[154]
max_s2 = s2[154]
s1_normalized = [(x / max_s1) for x in s1]
s2_normalized = [(x / max_s2) for x in s2]
# s1_normalized = [(x-min(s1) / max_s1-min(s1)) for x in s1]
# s2_normalized = [(x-min(s2) / max_s2-min(s2)) for x in s2]

# plot
plt.clf()
plt.plot(s1_normalized, label='Small red', color='red')
plt.plot(s2_normalized, label='False positive', color='blue')

# Points values
plt.text(25, s1[25]+0.25, f'Point 25: {s1[25]}', fontsize=10, color='red')
plt.text(80, s1[80]+0.25, f'Point 80: {s1[80]}', fontsize=10, color='red')
plt.text(154, s1[154]+0.25, f'Point 154: {s1[154]}', fontsize=10, color='red')
plt.text(25, s2[25]-0.25, f'Point 25: {s2[25]}', fontsize=10, color='blue')
plt.text(80, s2[80]-0.25, f'Point 80: {s2[80]}', fontsize=10, color='blue')
plt.text(154, s2[154]-0.25, f'Point 154: {s2[154]}', fontsize=10, color='blue')

plt.title('Comparison small red - false positive (NORMALIZED)')
plt.xlabel('Index')
plt.ylabel('Normalized Value')
plt.legend()
plt.show()

# Working with more than 2 points 

In [None]:
# We take points/spectral bands of our interest
df_false_positives_m_v2 = df_false_positives.iloc[:, [23,24,25,80,154]]
df_small_red_m_v2 = df_small_red.iloc[:, [23,24,25,80,154]]

# into numpy arrays (only points of interest)
false_positives_v2 = np.array(df_false_positives_m_v2)
small_red_v2 = np.array(df_small_red_m_v2)

In [None]:
def f(x, data_array):
    
    # Number of samples (data)
    m = len(data_array)
    
    for i in range(len(data_array)):
        
        # Values of each point at each iteration
        value23, value24, value25, value80, value154 = data_array[i]

        # Operation 
        calc = (1/2*m)*(pow((value154*x[0]-value23),2)+pow((value154*x[1]-value24),2)+pow((value154*x[2]-value25),2)+pow((value154*x[3]-value80),2))

        return calc

In [None]:
# Optimization using gradient descent
start = np.zeros(df_false_positives_m_v2.shape[1]-1, dtype=int) # starting point

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_cg.html
result_g = opt.fmin_cg(f, start, args=(small_red_v2,))
print(result_g)

In [None]:
# Check if it works
y0, y1, y2, y3 = result_g
samples = false_positives_v2.shape[0]
count = 0

for i in range(samples):
    value23, value24, value25, value80, value154 = false_positives_v2[i]
    test = (1/4)*(pow((value154*y0-value23),2) + pow((value154*y1-value24),2) + pow((value154*y2-value25), 2) + pow(value154*y3-value80, 2))  # 4 --> 2*m(m=number of points used)
    # print(test)
    if test < 0.0001:
        count += 1

print(f'Working on: {count} samples out of a total of {samples} (false positive samples)')

# Working with many points

In [None]:
# We take points/spectral bands of our interest
df_false_positives_m_v3 = df_false_positives.iloc[:, 0:26].join(df_false_positives.iloc[:, [80,154]])
df_small_red_m_v3 = df_small_red.iloc[:, 0:26].join(df_small_red.iloc[:, [80,154]])

# into numpy arrays (only points of interest)
false_positives_v3 = np.array(df_false_positives_m_v3)
small_red_v2 = np.array(df_small_red_m_v3)

In [None]:
def f(x, data_array):
    
    # Number of samples (data)
    m = len(data_array)
    
    for i in range(len(data_array)):
        
        total = 0
        value154 = data_array[i][data_array.shape[1]]
        for j in range(data_array.shape[1]):
            # Operation 
            total = total + pow((value154*x[j]-data_array[j]),2)    # FIXME fix lambdas

        return total

In [None]:
# Optimization using gradient descent
start = np.zeros(df_false_positives_m_v3.shape[1], dtype=int) # starting point

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_cg.html
result_g = opt.fmin_cg(f, start, args=(small_red_v2,))
print(result_g)