In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import chain
import methods as m
import pickle
import warnings
import plotly as py
import os

warnings.filterwarnings('ignore')

In [None]:
def get_list_videos(vid_dir):
    f = []
    for (dirpath, dirnames, filenames) in os.walk(vid_dir):
        f.extend(filenames)
    return f


def run_openpose(vid_dir,coord_location, openpose_location):
    os.chdir(openpose_location)
    for video in get_list_videos(vid_dir):
        os.system(r'bin\OpenPoseDemo.exe --video "{0}\{1}" --write_json "{2}\{1}"'.format(vid_dir, video, coord_location))

        
def create_total_feature_df(coord_df, video_number, return_df, person_plottables, running_fragments, fragments, fps):
    feature_df = to_feature_df(coord_df, video_number, person_plottables, running_fragments, fragments, fps)
    if return_df is None:
        return_df = feature_df
#         print(return_df)
    else:
        return_df = return_df.append(feature_df)
    return return_df


def to_feature_df(coord_df, video_number, person_plottables, running_fragments, fragments, fps):
    """
    Gets a DataFrame of coordinates and turns this into features.
    In this case, the standard deviation of movement vertically. Extension to also horizontally can be easily made in case this helps for discovering speed.

    :param coord_df: A dataframe containing all relevant coördiantes observed in the video.

    :return features_df: returns a dataframe containing standard deviations of all observed coordinates
    """
    
    #Set video number
    coord_df['video'] = video_number
    
    #extract basic std deviation features of all joints
    feature_df = coord_df.pivot_table(index=['video', 'Fragment'], columns='Point', values='y', aggfunc=np.std)
  
    #set video index
    feature_df['video'] = feature_df.index

    #Add value representing how much (in absoluut values) someone leaned forward
    feature_df['Forward_leaning'] = m.forward_leaning_angle(coord_df)
    
    feature_df['speed (km/h)'] = m.speed_via_distance(person_plottables, running_fragments, fragments, fps)
    
    return feature_df

In [None]:
def prepare_data_for_classification(vid_dir,coord_location, openpose_location):
    #run_openpose(vid_dir,coord_location, openpose_location)
    video_number = 1
    return_df = None
    for video in get_list_videos(vid_dir):
        print(video)
        image_h, image_w, fps = m.determine_video_meta_data(r'{}\{}'.format(vid_dir, video))

#        people_per_file = m.get_openpose_output(r'{}\{}'.format(coord_location,video))
        period_person_division = m.get_period_person_division(m.get_openpose_output(r'{}\{}'.format(coord_location,video)), fps)
        person_period_division = m.get_person_period_division(period_person_division)
        
        mean_x_per_person = m.get_mean_x_per_person(person_period_division)
        
        normalized_moved_distance_per_person = m.normalize_moved_distance_per_person(mean_x_per_person)
        maximum_normalized_distance = max(normalized_moved_distance_per_person.values())
        
        movement_threshold = maximum_normalized_distance / 4
        
        moving_people = [key for key, value in normalized_moved_distance_per_person.items() if value > movement_threshold]
        
        
        person_plottables_df = m.get_person_plottables_df(mean_x_per_person, moving_people)
        
        dbscan_subsets = m.get_dbscan_subsets(maximum_normalized_distance, person_plottables_df)
        max_dbscan_subset = dbscan_subsets[
            np.argmax([sum([len(person_period_division[person]) for person in subset]) for subset in dbscan_subsets])]
        
        plottable_people = m.determine_plottable_people(person_plottables_df,
                                                     max_dbscan_subset,
                                                     maximum_normalized_distance*4,
                                                     maximum_normalized_distance**2)
        
        running_fragments, turning_fragments, fragments = m.get_running_and_turning_fragments(plottable_people,
                                                                                                   mean_x_per_person,
                                                                                                   person_plottables_df,
                                                                                                   moving_people,
                                                                                                   fps)
        
        coord_df = m.get_dataframe_from_coords(m.prepare_data_for_plotting(period_person_division, 
                                                                           plottable_people, 
                                                                           running_fragments))
        
        rotation_angle = m.get_rotation_angle(coord_df)

        coord_df = m.process_coord_df(coord_df)

        period_person_division = {period:{person: np.array([m.rotate((x,y), rotation_angle)+(z,) for x,y,z in coords])
                                          for person, coords in period_dictionary.items()}
                                          for period, period_dictionary in period_person_division.items()}

        person_plottables, running_plottables, turning_plottables = \
                        m.get_plottables(period_person_division, plottable_people, running_fragments, turning_fragments)
        
        return_df = create_total_feature_df(coord_df, video_number, return_df, 
                                            person_plottables, running_fragments, fragments, fps)
        video_number +=1
    return return_df
    
    
        
df = prepare_data_for_classification(r'C:\Users\jaspe\tf-openpose\clips',
             r'C:\Users\jaspe\tf-openpose\demo\openpose-1.2.1-win64-binaries\coordinates',
            r'C:\Users\jaspe\tf-openpose\demo\openpose-1.2.1-win64-binaries')

# prepare_data_for_classification(r'C:\Users\herbe\Dropbox\TUe\DS-E\2017-2018\JM0130 Data Entrepreneurship in Action II\4. 2D_pose_estimation\videos',
#              r'C:\Users\herbe\Dropbox\TUe\DS-E\2017-2018\JM0130 Data Entrepreneurship in Action II\4. 2D_pose_estimation\coordinates',
#             r'C:\Users\Herbert van Leeuwen\Desktop\openpose-1.2.1-win64-binaries\openpose-1.2.1-win64-binaries')

In [None]:
df

In [None]:
% matplotlib inline

In [None]:
import seaborn as sns

features = list(set(df.columns[:-1]) - set(['video']))

f,ax = plt.subplots(nrows=len(features)//3+1, ncols=3, figsize=(18,5*len(features)//3))

for n, feature in enumerate(features):
    sns.regplot(data=df, x=feature, y='speed (km/h)', ax=ax[n//3][n%3])

In [None]:
df.to_csv('df_for_colin.csv', sep=',')

In [None]:
pickle.dump(df, open('df_for_colin.p', 'wb'))