In [1]:
import cv2 as cv
import numpy as np
import pandas as pd
import os
import shutil
import pickle
from traffic_intensity import video_analyze, predict

print(cv.__version__)
print(np.__version__)


4.5.3
1.20.3


# Подготовка данных

In [2]:
df = pd.read_csv('c:/datasets/car_traffic/info.txt', sep='\t', header=0, names=['filename', 'date(yyyymmdd)', 'timestamp', 'direction', 'day/night', 'weather', 'start frame', 'number of frames', 'class', 'notes'])
df.drop(columns=['notes'], inplace=True)
df['points'] = 0.0
df['velocities'] = 0.0
df['labels'] = 0
df['labels'][df['class'] == 'heavy'] = 0
df['labels'][df['class'] == 'light'] = 1
df['labels'][df['class'] == 'medium'] = 2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['labels'][df['class'] == 'heavy'] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['labels'][df['class'] == 'light'] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['labels'][df['class'] == 'medium'] = 2


In [3]:
# Устранение разбалансировки классов

print(df['labels'].value_counts())

max_cls = df['labels'].value_counts().idxmax()
max_cls_count = df['labels'].value_counts().max()
reduction_factors = df['labels'].value_counts() / max_cls_count
rebalance_count = int(max_cls_count * min(reduction_factors))
droped_idx = np.random.choice(df[df['labels']==max_cls].index, replace=False, size=max_cls_count - rebalance_count) 

df.drop(index=droped_idx, inplace=True)
df.reset_index(inplace=True)

print(df.shape)
print(df['labels'].value_counts())

1    165
2     45
0     44
Name: labels, dtype: int64
(133, 13)
2    45
1    44
0    44
Name: labels, dtype: int64


## Извлечение признаков

In [4]:
path = 'c:/datasets/car_traffic/video/'
backup_path = './images'
save_img = False

if save_img:
    if os.path.exists(backup_path):
        shutil.rmtree(backup_path)
        os.mkdir(backup_path)
    else:
        os.mkdir(backup_path)

for i, name in enumerate(df.filename):
    file = path + name + '.avi'
    df['points'][i], df['velocities'][i] = video_analyze(path=file, saved_path=backup_path, save_img=save_img)
    print(f"File {i + 1}, points: {df['points'][i]}, v: {df['velocities'][i]}, label true: {df['class'][i]}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['points'][i], df['velocities'][i] = video_analyze(path=file, saved_path=backup_path, save_img=save_img)


File 1, points: 33.0, v: 0.01565540151438868, label true: medium
File 2, points: 38.0, v: 0.010687475323765, label true: medium
File 3, points: 20.0, v: 0.009598616081955688, label true: light
File 4, points: 88.0, v: 0.007524372174481055, label true: medium
File 5, points: 47.0, v: 0.01561422069298849, label true: medium
File 6, points: 82.0, v: 0.008005817318362902, label true: medium
File 7, points: 144.0, v: 0.005837679069034512, label true: medium
File 8, points: 76.0, v: 0.006956397152765837, label true: medium
File 9, points: 69.0, v: 0.008572990018242495, label true: heavy
File 10, points: 32.0, v: 0.023162733082581932, label true: medium
File 11, points: 20.0, v: 0.021439692984988268, label true: medium
File 12, points: 155.0, v: 0.001796508371438936, label true: heavy
File 13, points: 109.0, v: 0.0047904837742628265, label true: medium
File 14, points: 97.0, v: 0.006483434026851283, label true: heavy
File 15, points: 106.0, v: 0.004747679038330585, label true: heavy
File 16, 

In [5]:
print("Корреляция кол-ва точек и средней скорости потока:", df['points'].corr(df['velocities']))

Корреляция кол-ва точек и средней скорости потока: -0.5401292113897629


Корреляция обратная (больше машин, меньше средняя скорость потока), умеренная (средняя). 
Допущение: кол-во объектов учтено в средней скорости потока, поэтому не используется в обучении моделей  

## Формирование обучающей выборки

In [6]:
mask = np.random.rand(len(df)) < 0.8
train = df.loc[:, 'velocities':'labels'][mask]
test = df.loc[:, 'velocities':'labels'][~mask]
print(train.shape, test.shape)

x_train = train.loc[:, 'velocities'].to_numpy()
x_train = np.float32(x_train.reshape(-1, 1))
y_train = train.loc[:, 'labels'].to_numpy()
print(x_train.shape, y_train.shape)

x_test = test.loc[:, 'velocities'].to_numpy()
x_test = np.float32(x_test.reshape(-1, 1))
y_test = test.loc[:, 'labels'].to_numpy()
print(x_test.shape, y_test.shape)


(109, 2) (24, 2)
(109, 1) (109,)
(24, 1) (24,)


# Base model

In [7]:
heavy_25 = np.percentile(df['velocities'][df['class'] == 'heavy'], 25)
heavy_50 = np.percentile(df['velocities'][df['class'] == 'heavy'], 50)
heavy_75 = np.percentile(df['velocities'][df['class'] == 'heavy'], 75)
print("heavy", heavy_25, heavy_50, heavy_75)

medium_25 = np.percentile(df['velocities'][df['class'] == 'medium'], 25)
medium_50 = np.percentile(df['velocities'][df['class'] == 'medium'], 50)
medium_75 = np.percentile(df['velocities'][df['class'] == 'medium'], 75)
print("medium", medium_25, medium_50, medium_75)

light_25 = np.percentile(df['velocities'][df['class'] == 'light'], 25)
light_50 = np.percentile(df['velocities'][df['class'] == 'light'], 50)
light_75 = np.percentile(df['velocities'][df['class'] == 'light'], 75)
print("light", light_25, light_50, light_75)

base_model_edges = {'heavy': (0, max(heavy_50, medium_25)),
                    'medium': (max(heavy_50, medium_25), max(medium_50, light_25)), 
                    'light': (max(medium_50, light_25), ) 
                    }

with open('basemodel.dict', 'wb') as f:
    print("Edges (Low, High):", base_model_edges)
    pickle.dump(base_model_edges, f)

heavy 0.0037783384968033008 0.005978740018833811 0.01120161453311667
medium 0.008886645007061678 0.014360985764407821 0.020692047926079888
light 0.014443771985270958 0.01917832109684389 0.023836442093701116
Edges (Low, High): {'heavy': (0, 0.008886645007061678), 'medium': (0.008886645007061678, 0.014443771985270958), 'light': (0.014443771985270958,)}


## SVM

Проблема: predict не выдает лейбла 2, надо дальше разбираться как работает предикт при мультиклассовой классификации в OpenCV

In [None]:
df['labels'] = 0
df['labels'][df['class'] == 'heavy'] = 0
df['labels'][df['class'] == 'light'] = 1
df['labels'][df['class'] == 'medium'] = 2

In [None]:
svm = cv.ml.SVM_create()
svm.setKernel(cv.ml.SVM_LINEAR)
svm.setType(cv.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)
svm.train(x_train, cv.ml.ROW_SAMPLE, y_train)
svm.save('svm_data.dat')

result = svm.predict(x_train)[1]
mask = result==y_test
correct = np.count_nonzero(mask)
print(correct * 100.0 / result.size)


In [None]:
print(result)

## KMeans

In [8]:
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
ret, label, center = cv.kmeans(x_train, 3 , None, criteria, 10, cv.KMEANS_RANDOM_CENTERS)
print("Центры кластеров:", center)

Центры кластеров: [[0.03583   ]
 [0.00744685]
 [0.02086256]]


In [9]:
np.save('centroids', center)

In [10]:
index2label = {0: 'light', 1: 'heavy', 2: 'medium'}
with open('index2label.dict', 'wb') as f:
    pickle.dump(index2label, f)

In [11]:
result = []
for i in range(x_test.shape[0]):
    result.append(predict(x_test[i], centroids=center)) 
print("Accuracy:", np.sum(np.array(result)==y_test) / y_test.size)

Accuracy: 0.16666666666666666
