In [None]:
import cv2
import numpy as np
from matplotlib import pyplot
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
from scipy.fft import fftshift
import os 
#import seaborn as sns
from itertools import zip_longest
from tqdm import tqdm
import matplotlib.lines as mlines
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from scipy.signal import savgol_filter

In [None]:
def frame_count(video_path, manual=False):
    def manual_count(handler):
        frames = 0
        while True:
            status, frame = handler.read()
            if not status:
                break
            frames += 1
        return frames 

    cap = cv2.VideoCapture(video_path)
    # Slow, inefficient but 100% accurate method 
    if manual:
        frames = manual_count(cap)
    # Fast, efficient but inaccurate method
    else:
        try:
            frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        except:
            frames = manual_count(cap)
    cap.release()
    return frames

In [None]:
os.chdir('/Users/andrei-macpro/Documents/Data/videos/meal_videos')

In [None]:
path = '/Users/andrei-macpro/Documents/Data/videos/meal_videos'

videos = [file for file in sorted(os.listdir(path)) if file[0]!="."]


In [None]:
rad = [1047, 1059, 1069, 1079, 1089, 1093, 1099, 1107, 1108, 
      1117, 1122, 1124, 1125, 1129, 1131, 1132, 1134, 1148, 1186, 
      1190, 1195, 1206, 1210, 1217, 1230, 1246, 1250, 1264, 2009, 2027]
no_rad = [1043, 1049, 1053, 1062, 1073, 1080, 1082, 1091, 1092, 
         1096, 1097, 1098, 1104, 1105, 1112, 1118, 1130, 1146, 
         1161, 1170, 1180, 1181, 1184, 1188, 1234, 1241, 1245, 1263,
         1269, 1282, 2025]

In [None]:
cap = cv2.VideoCapture('1043_meal.mp4')
frame_no = frame_count('1043_meal.mp4', manual=True)
frames = []
counter = 0
differences = []

for fr in range(1, frame_no, 10):
    cap.set(1, fr)
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) #converts captured frame to Grayscale for easier analysis
    frames.append(gray)
    if len(frames)>100:
        frames = frames[-100:]
    if counter > 10:
        difference = np.abs(cv2.subtract(frames[-1], frames[-1-10]))
        ret2,th = cv2.threshold(difference,0,1,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        differences.append(np.sum(th))
print(len(differences))
differences=np.array(differences)
differences = savgol_filter(differences, 11, 2)
cap.release()
cv2.destroyAllWindows()

In [None]:
def frame_differencing(video_input, threshold_method,granularity):
    cap = cv2.VideoCapture(video_input)
    frame_no = frame_count(video_input, manual=True)
    frames = []
    counter = 0
    differences = []

    for fr in range(1, frame_no, granularity):
        cap.set(1, fr)
        ret, frame = cap.read()
        gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) #converts captured frame to Grayscale for easier analysis
        frames.append(gray)
        if len(frames)>100:
            frames = frames[-100:]
       
        if counter > granularity:
            difference = np.abs(cv2.subtract(frames[-1], frames[-2]))
            ret2,th = cv2.threshold(difference,0,1,threshold_method)
            differences.append(np.sum(th))
        counter+=1
    differences=np.array(differences)
    differences = savgol_filter(differences, 11, 2)
    cap.release()
    cv2.destroyAllWindows()
    return differences

def dataframes(videos, threshold_method, granularity, title):
    frame_diffs = []
    for video in tqdm(videos): 
        frame_diffs.append(frame_differencing(video,threshold_method,granularity))
    frame_diffs = np.array(frame_diffs, dtype='object')
    data = pd.DataFrame(zip_longest(*frame_diffs), columns = [video.split('_')[0] for video in videos])
    data_rad = data[data.columns & [str(x) for x in rad]]
    data_no_rad = data[data.columns & [str(x) for x in no_rad]]
    df =  pd.concat([data_rad.T.assign(label='rad'), data_no_rad.T.assign(label='no-rad')])
# df['label'] = df['label'].sort_index()
# plt.rcParams['font.size']=30
# #figure(facecolor='grey')
# plt.rcParams['axes.facecolor'] = 'white'
# plt.rcParams["figure.figsize"] = [50,25]
# #plt.rc('legend',fontsize=20)
# 
# fig, ax = plt.subplots()
# for x in data_rad:  
#     n, bins, patches= ax.hist(data_rad[x], bins='fd',  color='red', alpha=.5, stacked=True)
# 
# for y in data_no_rad:
#     n, bins, patches= plt.hist(data_no_rad[y], bins='fd',  color='green', alpha=.5, stacked=True)
# #plt.legend(loc=2, prop={'size': 6})
# eight = mlines.Line2D([], [], color='red', marker='o', ls='', label='rad', markersize = 9)
# nine = mlines.Line2D([], [], color='green', marker='o', ls='', label='no-rad', markersize = 9)
# # etc etc
# ax.legend(handles=[eight, nine], markerscale=7, fontsize='x-large')
# 
# #plt.axvline(0.36, color='k', linestyle='dashed', linewidth=5)
# #plt.text(0.25, 250, ' <5%', size='large')
# #plt.xlim(right = 1)
# plt.grid(True)
# plt.xlabel("Frame change")
# plt.ylabel("Probability (%)")
# plt.title(title)
# plt.show()
    return data,df

In [None]:
data, df = dataframes(videos, cv2.THRESH_BINARY+cv2.THRESH_OTSU, 20, "Meal Otsu 15 frames Absolute")

In [None]:
df = df.groupby(df.index).mean()
df

In [None]:
labels = pd.read_csv('/Users/andrei-macpro/Documents/Data/Results/corpus_processed/corpus_labelled.csv', index_col=0)
labels

In [None]:
labels.index.astype = 'int64'
df.index = df.index.astype(int)

In [None]:
labels.index, df.index

In [None]:
df.index.name = 'Subject_ID'

In [None]:
data_rad = data[data.columns & [str(x) for x in rad]]
data_no_rad = data[data.columns & [str(x) for x in no_rad]]
df =  pd.concat([data_rad.T.assign(label='rad'), data_no_rad.T.assign(label='no-rad')])
df['label'] = df['label'].sort_index()

In [None]:
data.to_csv('meal_otsu_15_abs.csv')

In [None]:
# calculate mean and variance
mean, variance = df.iloc[:,:-2].mean(axis=1), df.iloc[:,:-2].var(axis=1)


In [None]:

new_df = pd.concat([mean, variance], axis=1)
new_df.columns = ['mean', 'variance']
new_df = new_df.join(labels['label'])

In [None]:
new_df

In [None]:
new_df.to_csv('meal_fd_20.csv')

In [None]:
new_df = pd.read_csv('/Users/andrei-macpro/Documents/Data/frame_differences/meal_fd_10.csv')

In [None]:
plt.hist(new_df['mean'])

In [None]:
#big_df = big_df.reset_index(drop=True)
import seaborn as sns
sns.displot(new_df, x="variance", hue="label", kind="ecdf")

In [None]:
X = new_df.to_numpy()
y = df['label']

In [None]:
## classification with a grid search

X_shuffled, y_shuffled, groups_shuffled = shuffle(X,y,big_df.index ,random_state=42)
print(groups_shuffled)
scalar = preprocessing.StandardScaler()
#clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)
parameter_candidates = {'model__kernel':['linear', 'rbf'], 'model__C':[1, 10,100]}
clf = svm.SVC()
pipeline = Pipeline(steps=[('preprocess', scalar), ('model', clf)])

cv = GroupKFold(n_splits=5)
#cross_val_score(pipeline, X, y, cv = cv)
search = GridSearchCV(pipeline, parameter_candidates, scoring = 'accuracy', cv =cv)
search.fit(X_shuffled, y_shuffled, groups = groups_shuffled)

In [None]:
pd.DataFrame.from_dict(search.cv_results_)

In [None]:
big_df.plot.scatter(x = 'mean', y ='variance', c = 'label')

In [None]:
## run the best hyperparameters 10 times 

i = 0
means = []
#stds = []
stds = []
for i in range(10):
    X_shuffled, y_shuffled = shuffle(X,y, random_state=i)
    scalar = preprocessing.StandardScaler()
    parameter_candidates = {'model__kernel':['linear', 'rbf'], 'model__C':[1, 10,100]}
    clf = svm.SVC()
    pipeline = Pipeline(steps=[('preprocess', scalar), ('model', clf)])
    cv = KFold(n_splits=5)
    #for train_index, test_index in cv.split(X_shuffled):
     # print("TRAIN:", train_index, "TEST:", test_index)
    #cross_val_score(pipeline, X, y, cv = cv)
    search = GridSearchCV(pipeline, parameter_candidates, cv =cv)
    search.fit(X_shuffled, y_shuffled)
    means.append(search.best_score_)
    stds.append(search.cv_results_['std_test_score'].mean())

np.mean(means), np.mean(stds)


In [None]:
## plot decision regions 

resolution=0.2
X, y = shuffle(X,y, random_state=0)
cv = KFold(n_splits=5)
plt.rcParams["figure.figsize"] = [10,7]
for train_index, test_index in cv.split(X):
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)
    model = SVC(kernel = 'rbf', C=1)
    model.fit(X_train, y_train)
    x1_min, x1_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
    x2_min, x2_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
    print(x1_min, x1_max)
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))
    print(xx1)
    Z = model.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())
    plt.show()

    for idx, cl in enumerate(np.unique(y_train)):
        plt.scatter(x=X_train[y_train == cl, 0], y=X_train[y_train == cl, 1],
                  alpha=0.8, c=cmap(idx),
                  marker=markers[idx], label=cl)


In [None]:
parameter_candidates = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]

# Create a classifier object with the classifier and parameter candidates
clf = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1, cv = skf)

# Train the classifier on data1's feature and target data
clf.fit(X, y)   