In [1]:
import numpy as np
import pandas as pd
import cv2
from matplotlib import pyplot as plt
from collections import defaultdict
import seaborn as sns
from tqdm import tqdm 

In [14]:
A = np.random.randint(low = 600, high = 700, size=(1000, 2))
B = np.random.randint(low = 600, high = 700, size=(1000, 2))

nrows, ncols = A.shape
dtype={'names':['f{}'.format(i) for i in range(ncols)],
       'formats':ncols * [A.dtype]}

C = np.intersect1d(A.view(dtype), B.view(dtype))

# This last bit is optional if you're okay with "C" being a structured array...
C = C.view(A.dtype).reshape(-1, ncols)

In [15]:
len(C)

81

In [99]:
csv_file = "data/PISSS_ID_003_Approach Two Gaze-Vergence.csv"
df_data = pd.read_csv(csv_file)
df_data = df_data.dropna()
x = df_data[["Y Pos", "X Pos"]].values

In [18]:
label = {"standby_ASI": [(534, 424), 50, "c"], # [(cx,cy), radius, type: "c" = circle]  
         "standby_AH": [(528,530), 50, "c"],
         "standby_ALT": [(523, 626), 45, "c"], 
         "RPM_LEFT": [(633, 411), (672, 451), "r"], #[(topleft_x,y), (bottomright_x,y), type: "r" = rectangle]  
         "G1000_ASI": [(674, 432), (705, 525), "r"],
         "G1000_AI": [(720, 413), (790, 495), "r"],
         "HSI": [(742, 556), 45, "c"],
         "G1000_ALT": [(797, 428), (852, 523), "r"],
         "NRST_Box": [(792, 534), (876, 554), "r"],
         "RPM_RIGHT": [(1054, 440), 30, "c"],
         "outside": [(0,0), (1280, 350), "r"]} 


In [22]:
pdict = defaultdict()
h, w = 720, 1280
for key in label:
    if label[key][-1] == "c":
        mask = np.zeros((h,w), np.uint8)
        cv2.circle(mask,label[key][0], label[key][1],255,-1)
        points = np.where(mask==255)
        pdict[key] = points
    elif label[key][-1] == "r":
        mask = np.zeros((h,w), np.uint8)
        cv2.rectangle(mask,label[key][0], label[key][1],255,-1)
        points = np.where(mask==255)
        pdict[key] = points

In [35]:
fovea_radius = 20
para_radius = 50

def get_fovea_parafovea(point):
    mask = np.zeros((h,w), np.uint8)
    cv2.circle(mask, point, para_radius, 255, -1)

    # mask = np.zeros((h,w), np.uint8)
    cv2.circle(mask, point, fovea_radius, 100, -1)

    fovea = np.where(mask==100)
    parafovea = np.where(mask==255)
    
    return fovea, parafovea

def count_intersect(A,B):
    aset = set([tuple(x) for x in A])
    bset = set([tuple(x) for x in B])
    C = np.array([x for x in aset & bset])
    
    return len(C)

def calculate_intersect(point, ROI):
    fovea, parafovea = get_fovea_parafovea(point)
    fovea = np.transpose(fovea)
    parafovea = np.transpose(parafovea)
    ROI = np.transpose(ROI)
    
    ifovea = count_intersect(fovea, ROI)
    iparafovea = count_intersect(parafovea, ROI)
    
    p = (ifovea*3 + iparafovea) / (len(fovea)*3 + len(parafovea))
    
    return p

In [47]:
def matching_rows(A,B):
    A_rows = A.T.view([('', A.dtype)]).T
    B_rows = B.T.view([('', B.dtype)]).T
    C = np.intersect1d(A_rows, B_rows)
    return len(C)

In [52]:
import timeit

In [53]:
point = (650, 450)
fovea, parafovea = get_fovea_parafovea(point)
fovea = np.transpose(fovea)
parafovea = np.transpose(parafovea)
ROI = np.transpose(pdict["outside"])

print ('\t Voted answer:',timeit.timeit(lambda:matching_rows(fovea, ROI),number=100)/100)
print ('\t Voted answer:',timeit.timeit(lambda:count_intersect(fovea, ROI),number=100)/100)



	 Voted answer: 0.627371506
	 Voted answer: 0.846919995


In [54]:
[1,2,3,4]

[1, 2, 3, 4]

In [None]:
rect: min(x, y) max(x,y) 

In [59]:
a = [[1,2],[3,4],[5,6]]
b = [[2,3],[3,4],[7,1]]

aset = set([tuple(x) for x in a])
bset = set([tuple(x) for x in b])
np.array([x for x in aset & bset])

array([[3, 4]])

2742571194192

In [86]:
A = np.random.randint(low = 600, high = 700, size=(1000, 1))
B = np.random.randint(low = 600, high = 700, size=(1000, 1))

In [87]:
C = np.random.randint(low = 0, high = 1, size=(1000, 1))

In [95]:
df_data = pd.DataFrame(zip(A,B,C),columns=["A", "B", "C"])

In [96]:
df_X = df_data[["A", "B"]]
df_Y = df_data[["C"]]


In [104]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [153]:
x = np.array([[1,1],[1,2],[1,1],[1,2],[2,1],[8,9],[8,9],[8,9],[8,9],[8,9]])
y = np.array([0,0,0,0,0,1,1,1,1,1])

In [154]:
x.shape, y.shape

((10, 2), (10,))

In [155]:
clf = LogisticRegression().fit(x, y)


In [159]:
x_test = [[1,3],[4,5],[4,6],[1,7],[6,6],[6,8],[1,1]]
y_test = [0,1,1,0,0,1,0]

In [160]:
y_pred = clf.predict(x_test)

In [162]:
for u,v in zip(y_test, y_pred):
    print(u,v)

0 0
1 0
1 1
0 0
0 1
1 1
0 0


In [165]:
y_prob = clf.predict_proba(x_test)

In [167]:
pred2 = []
for v in y_prob:
    if v[1] > 0.3:
        pred2.append(1)
    else:
        pred2.append(0)

In [None]:
recall (0 la bai toan) = 1

In [168]:
target_names = ['class 0', 'class 1']
print(classification_report(y_test, pred2, target_names=target_names))

              precision    recall  f1-score   support

     class 0       1.00      0.75      0.86         4
     class 1       0.75      1.00      0.86         3

    accuracy                           0.86         7
   macro avg       0.88      0.88      0.86         7
weighted avg       0.89      0.86      0.86         7



In [79]:
id(a)

2742551640328

In [44]:
sales = pd.DataFrame({ 'Date': ['2020-02-01 00:13:00', '2020-02-01 02:10:00', '2020-02-01 03:03:00', 
                                '2020-02-01 06:52:00', '2020-02-01 09:19:00', '2020-02-01 09:50:00',
                                '2020-02-01 10:00:00', '2020-02-01 10:06:00', '2020-02-19 21:43:00'],
                       'Amount': [2000, 26080, 5060, 800, 3000, 10934, 2250, 3600, 11528] })

# sales["Time"] = pd.to_datetime(sales["Date"]).dt.time
# sales[(sales["Time"] > pd.to_datetime("00:00:00").time()) & (sales["Time"] < pd.to_datetime("04:59:00").time())]

In [45]:
sales["Time"] = pd.to_datetime(sales["Date"]).dt.time
# sales["Date"] = pd.to_datetime(sales["Date"])

In [32]:
sales

Unnamed: 0,Date,Amount,Time
0,2020-02-01 00:13:00,2000,00:13:00
1,2020-02-01 02:10:00,26080,02:10:00
2,2020-02-01 03:03:00,5060,03:03:00
3,2020-02-01 06:52:00,800,06:52:00
4,2020-02-01 09:19:00,3000,09:19:00
5,2020-02-01 09:50:00,10934,09:50:00
6,2020-02-01 10:00:00,2250,10:00:00
7,2020-02-01 10:06:00,3600,10:06:00
8,2020-02-19 21:43:00,11528,21:43:00


In [46]:
sales[(sales["Time"] > pd.to_datetime("00:00:00").time()) & (sales["Time"] < pd.to_datetime("04:59:00").time())]["Amount"].sum()

33140

In [4]:
a = pd.DataFrame({"col1": ["A", "B", "D", "C", "A"],
                  "col2": ["A", "B", "D", "A", "B"],
                  "col3": ["D", "A", "B", "D", "A"]})

In [6]:
pd.value_counts(a.values.flatten())

A    6
D    4
B    4
C    1
dtype: int64

In [1]:
import numpy as np

In [21]:
a = np.arange(8).reshape([4,2])

In [None]:
b = a[:,[0]]

In [34]:
np.concatenate((a,b), axis = 1)

array([[0, 1, 0],
       [2, 3, 2],
       [4, 5, 4],
       [6, 7, 6]])