In [None]:
import numpy as np
import cv2
import glob
from matplotlib import pyplot as plt
import os
import pandas as pd
from osgeo import gdal
from osgeo import osr
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
def imshow(image, show_axes = False, quiet = False):
    if len(image.shape) == 3:
      # Height, width, channels
      # Assume BGR, do a conversion since 
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    else:
      # Height, width - must be grayscale
      # convert to RGB, since matplotlib will plot in a weird colormap (instead of black = 0, white = 1)
      image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    # Draw the image
    plt.imshow(image)
    if not show_axes:
        # We'll also disable drawing the axes and tick marks in the plot, since it's actually an image
        plt.axis('off')
    if not quiet:
        # Make sure it outputs
        plt.show()

# Goal:
This note book process MET videos.
1. Separate the image into three scenes based on surface region
2. Give each prediction point a region indicator
3. Merge the transformation to one single dataframe

## 1. Create three polygons in the image (done in PS)

In [None]:
from shapely.geometry import Polygon
frame_folder = "../../_data/02_siteplan/sample_frames/current_sample"
ref_name = "20100612-082221b02_3446.jpg"
# w,h = 720,480

# # 20100612-082221b15
# region1 = Polygon([(303.6, 359.5), (720,316.5), (720, 480),(0, 480), (0, 343.5)]) # image space may have reversed y-axis

# region2 = Polygon([(303.6, 359.5), (327.6, 214.5), (452.6, 214.5), (720, 316.5)])
# region3 = Polygon([(303.6, 359.5), (327.6, 214.5), (0, 203.5), (0, 343.5)])
# regiondf = gpd.GeoDataFrame({
#     "region":["ground", "stair_right", "stair_left"],
#     "geometry":[region1, region2, region3]
# }, geometry="geometry")
# regiondf.plot()

imgpath = os.path.join(frame_folder, ref_name)
# imshow(imgpath, show_axes = False, quiet = False)

image  = cv2.cvtColor(cv2.imread(imgpath), cv2.COLOR_BGR2RGB)

pts1 = np.array([[303.6, 359.5], [720,316.5], [720, 480],[0, 480], [0, 343.5]],
               np.int32)
pts2 = np.array([[303.6, 359.5], [327.6, 214.5], [452.6, 214.5], [720, 300.5],[720, 316.5]], np.int32)
pts3 = np.array([[303.6, 359.5], [327.6, 214.5], [0, 203.5], [0, 343.5]], np.int32)
thickness = 2
color = (255, 0, 0)
isClosed = True

ptls = [pts1, pts2, pts3]

for pts in ptls:
    pts = pts.reshape((-1, 1, 2))
    image = cv2.polylines(image, [pts],
                    isClosed, color, thickness)

fig, ax = plt.subplots(figsize = (10,10))
plt.imshow(image)
cv2.imwrite(os.path.join(frame_folder, ref_name.split(".")[0]+"split.jpg"), image)

In [None]:
regiondf = gpd.GeoDataFrame({
    "region":["ground", "right_step", "left_step"],
    "geometry":[Polygon([(x,y) for x,y in pts]) for pts in ptls]
}, geometry="geometry")
regiondf.plot()

In [None]:
# load the image and plot the region of interest on the image
frame_folder = "../../_data/02_siteplan/sample_frames/current_sample"
ref_name = "20100612-120118b01_3446.jpg"

imgpath = os.path.join(frame_folder, ref_name)
# imshow(imgpath, show_axes = False, quiet = False)

image  = cv2.cvtColor(cv2.imread(imgpath), cv2.COLOR_BGR2RGB)

pts1 = np.array([[360.6, 352.5], [720,316.5], [720, 480],[0, 480], [0, 335.5]],
               np.int32)
pts2 = np.array([[360.6, 352.5], [374.6, 215.5], [477.6, 211.5], [720, 288.5],[720, 316.5]], np.int32)
pts3 = np.array([[360.6, 352.5], [374.6, 215.5], [0, 220.5], [0, 335.5]], np.int32)
thickness = 2
color = (255, 0, 0)
isClosed = True

ptls = [pts1, pts2, pts3]

for pts in ptls:
    pts = pts.reshape((-1, 1, 2))
    image = cv2.polylines(image, [pts],
                    isClosed, color, thickness)

fig, ax = plt.subplots(figsize = (10,10))
plt.imshow(image)
# save this image as the reference file
cv2.imwrite(os.path.join(frame_folder, ref_name.split(".")[0]+"split.jpg"), image)

In [None]:
regiondf = gpd.GeoDataFrame({
    "region":["ground", "right_step", "left_step"],
    "geometry":[Polygon([(x,y) for x,y in pts]) for pts in ptls]
}, geometry="geometry")
regiondf.plot()

In [None]:
# pts = pts1

# mask = np.zeros(image.shape[:2], np.uint8)
# cv2.drawContours(mask, [pts], -1, (255, 255, 255), -1, cv2.LINE_AA)
# dst = cv2.bitwise_and(image, image, mask=mask)
# # bg = np.ones_like(image, np.uint8)*255
# # cv2.bitwise_not(bg,bg, mask=mask)
# # dst2 = bg+ dst
# plt.imshow(dst)
# plt.imshow(dst2)

# 2. Put all prediction points results to the three regions

In [None]:
# load one sample prediction
# result_folder = "../_data/06_attr_result/"
# videoname = "20100612-120118b02"
# # videoname = "20100521-074701b03"
# predpath = os.path.join(result_folder, f'{videoname}.csv')
# trace = pd.read_csv(predpath)
# trace.rename(columns = {"x":"bbox0", "y":"bbox1", "w":"bbox2", "h":"bbox3"}, inplace = True)
# trace['loc_x'] = (trace['bbox0'] + trace['bbox0'] + trace['bbox2'])/2
# trace['loc_y'] = (trace['bbox1'] + trace['bbox3'])
# # generate point at image space
# tracept = gpd.GeoDataFrame(
#     trace,
#     geometry = gpd.points_from_xy(trace.loc_x, trace.loc_y)
# )
# # spatial join to get region name
# tracept_update = gpd.sjoin(tracept, regiondf, how = 'inner')
# print("region assigned")

# tracept_update.drop("geometry", axis = 1, inplace = True)
# del tracept
# import gc
# gc.collect()

# 3. Get projection by each region

In [None]:
def get_proj_matrix(ref):
    '''

    pts_src and pts_dst are numpy arrays of points

    in source and destination images. We need at least

    corresponding points.

    '''
    try:
        pts_src = np.array([(x,y) for x,y in zip(ref['sourceX'], -1*ref['sourceY'])])
    except:
        pts_src = np.array([(x,y) for x,y in zip(ref['pixelX'], -1*ref['pixelY'])])

    pts_dst  = np.array([(x,y) for x,y in zip(ref['mapX'], ref['mapY'])])

    h, status = cv2.findHomography(pts_src, pts_dst)

    '''
    The calculated homography can be used to warp

    the source image to destination. Size is the

    size (width,height) of im_dst
    '''
    return h

def projectPlan(df, h, x, y):
    pts = df[[x, y]].values
    ## (n, 1, 2)
    pts1 = pts.reshape(-1,1,2).astype(np.float32)
    dst1 = cv2.perspectiveTransform(pts1, h)
    return dst1


def pixel2coord(col, row, ds):
    # 3. transform to 2326 geolocation
    c, a, b, f, d, e = ds.GetGeoTransform()
    """Returns global coordinates to pixel center using base-0 raster index"""
    xp = a * col + b * row + a * 0.5 + b * 0.5 + c
    yp = d * col + e * row + d * 0.5 + e * 0.5 + f
    return(xp, yp)


# bbox0, bbox1, bbox2, bbox3 : x1, y1, w, h
# Replace this part for other data
# Trace

# check gender distribution within one track_id
# set attribute list
def get_attr(trace):
    attribute_ls = ['gender', 'age', 'side', 'glasses', 'hat', 'hold_objects_in_front',
        'bag', 'upper', 'lower', 'boots']
    # for each track_id only keep one major attribute
    for attr in attribute_ls:
        trace[attr] = trace.groupby("track_id")[attr].transform(lambda x: x.mode()[0])
    # trace[trace["track_id"] == 1].groupby(["gender"]).size()

    attr_df = trace.drop_duplicates("track_id")[attribute_ls+["track_id"]]
    return attr_df

def getclean(trace, h, epsg, videoname):


    
    trs2 = projectPlan(trace, h, 'loc_x', 'loc_y')
    trace[f'x_{epsg}'] = trs2[:,:,0]
    trace[f'y_{epsg}'] = trs2[:,:,1] 
    
    trace['video_id'] = videoname

        # smoothe the x, y for every 30 frames
    # trace['moving_x'] = trace.groupby('track_id')[f'x_{epsg}'].transform(lambda x: x.rolling(30, 1).mean())
    # trace['moving_y'] = trace.groupby('track_id')[f'y_{epsg}'].transform(lambda x: x.rolling(30, 1).mean())
    # attribute_ls = ['gender', 'age', 'side', 'glasses', 'hat', 'hold_objects_in_front',
    #     'bag', 'upper', 'lower', 'boots']
    
    cols = ['video_id',
        'frame_id',
                  'track_id','loc_x', 'loc_y',
                     f'x_{epsg}', f'y_{epsg}', # reference geo in HK
                     'category_id',
                     "score",
                     "region"
                  ]
    cols_keep = [x for x in trace.columns if x in cols]
    return trace[cols_keep]
            

def getgdf(traceDF, epsg, tail = True, length = 3):
    """length: refers to the second of lagging tail we want to see"""
    # smoothe the x, y for every 30 frames
    traceDF['moving_x'] = traceDF.groupby('track_id')[f'x_{epsg}'].transform(lambda x: x.rolling(5, 1).mean())
    traceDF['moving_y'] = traceDF.groupby('track_id')[f'y_{epsg}'].transform(lambda x: x.rolling(5, 1).mean())

    traceGDF = gpd.GeoDataFrame(traceDF, geometry = [Point(x,y) for x,y in zip(traceDF[f'x_{epsg}'],
                                                                               traceDF[f'y_{epsg}'])])
    traceGDF.crs = f"EPSG:{epsg}"
    traceGDF = traceGDF.to_crs('EPSG:4326')
    traceGDF['lat'] = traceGDF['geometry'].y
    traceGDF['lon'] = traceGDF['geometry'].x
    
    traceGDF['lat_moving'] = traceGDF.groupby('track_id')['lat'].transform(lambda x: x.rolling(5, 1).mean())
    traceGDF['lon_moving'] = traceGDF.groupby('track_id')['lon'].transform(lambda x: x.rolling(5, 1).mean())
    return traceGDF

# drop the outlier automatically
def find_outliers_IQR(df, field, low = 0.25, high = 0.75):

   q1=df[field].quantile(low)

   q3=df[field].quantile(high)

   IQR=q3-q1

   outliers = df[((df[field]<(q1-1.5*IQR)) | (df[field]>(q3+1.5*IQR)))]

   keep = df[((df[field]>=(q1-1.5*IQR)) & (df[field]<=(q3+1.5*IQR)))].reset_index(drop = True)

   return outliers, keep

In [None]:
def get_proj_video_archive(videoname, ref):
    """For new videos, we extract the detection results from tracks"""
    result_folder = "../../_data/06_attr_result/"
    predpath = os.path.join(result_folder, f'{videoname}.csv')
    trace = pd.read_csv(predpath)
    trace['ratio'] = trace['w']/trace['h']
    _, trace = find_outliers_IQR(trace, 'ratio', 0.15, 0.85)
    trace.rename(columns = {"x":"bbox0", "y":"bbox1", "w":"bbox2", "h":"bbox3"}, inplace = True)
    trace['loc_x'] = (trace['bbox0'] + trace['bbox0'] + trace['bbox2'])/2
    trace['loc_y'] = (trace['bbox1'] + trace['bbox3'])
    
    tracept = gpd.GeoDataFrame(
        trace,
        geometry = gpd.points_from_xy(trace.loc_x, trace.loc_y)
    )
    # spatial join to get region name
    tracept_update = gpd.sjoin(tracept, regiondf, how = 'inner') # regiondf currently is a global variable. needs to be updated to match different view angle
    print("region assigned")

    trace = tracept_update.drop("geometry", axis = 1)
    del tracept
    import gc
    gc.collect()
    
    
    h = get_proj_matrix(ref)
# # Set up projection for New York State Plane
# set up to match the projection of the reference data
    # epsg = 3857  #2263
    epsg = videopath_sel[videopath_sel['video_id']==videoname]['ref_epsg'].values[0]
    traceDF = getclean(trace, h, epsg, videoname)
    traceGDF = getgdf(traceDF, epsg)
    
    attr_df = get_attr(traceDF)

    # drop outliers
    _, traceGDF_keep = find_outliers_IQR(traceGDF, "moving_x")
    
    traceGDF = traceGDF_keep.drop(["gender","age"], axis = 1).merge(attr_df[["track_id", "gender", "age"]], on = "track_id", how = "left")
    
    return traceGDF

def getbasics(file_path):
    video = cv2.VideoCapture(file_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    print('frames per second =',fps)
    size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print('frames size =',size)
    # video.release()
    return video, fps, size

# read in the points
def get_ref(ref_path):
    # with open(ref_folder + ref_video + f"_3446_modified.tif.points", "r") as f:
    with open(os.path.join(ref_folder, ref_path), "r") as f:
        lines = f.readlines()
        lines = [line.strip().split(",") for line in lines]
    # convert to dataframe
    ref = pd.DataFrame(lines[1:], columns = lines[0])
    # convert to float
    ref = ref.astype(float)
    return ref


# def get_all_info(videoname, useimage = True):
#     """One reference file version"""
#     ref_path = videopath_sel[videopath_sel['video_id']==videoname]['ref_path'].values[0]
#     ref = get_ref(ref_path)

#     traceGDF_keep = get_proj_video(videoname, ref)
#     if "category_id" in traceGDF_keep.columns:
#         traceGDF_people = traceGDF_keep[(traceGDF_keep["category_id"] == 0)&(traceGDF_keep["score"]>0.1)].reset_index(drop = True)
#         return traceGDF_people
#     else:
#         return traceGDF_keep
    
    
def get_all_info(videoname, useimage = True):
    """Three reference files version"""
    ref_path = videopath_sel[videopath_sel['video_id']==videoname]['ref_path'].values[0]
    ref_ls = []
    traceGDF_ls = []
    for reg in ["ground", "right_step", "left_step"]:
        if "ground" in ref_path:
            ref = get_ref(ref_path.replace("ground", reg))
            ref_ls.append(ref)

        traceGDF_keep = get_proj_video(videoname, ref)
        # if "category_id" in traceGDF_keep.columns:
        # traceGDF_people = traceGDF_keep[(traceGDF_keep["category_id"] == 0)&(traceGDF_keep["score"]>0.1)].reset_index(drop = True)
        traceGDF_keep = traceGDF_keep[traceGDF_keep['region']==reg].reset_index(drop = True)
        traceGDF_ls.append(traceGDF_keep)
    traceGDF_ls = pd.concat(traceGDF_ls).reset_index(drop = True)
    return traceGDF_ls
    
    
def get_frame_num(time_str, fps = 29.97002997002997):
    try:
        time = time_str.split(" ")[0][3:].split(":")
        minute = int(time[0])
        second = int(time[1])
        frame = minute*60*fps + second*fps
        return int(frame)
    except:
        return np.nan

In [None]:

frame_folder = "../../_data/02_siteplan/sample_frames/current_sample"
tiff_folder = "../../_data/02_siteplan/geo_tiff"
ref_folder = "../../_data/02_siteplan/gcp_pt/"
frames = os.listdir(frame_folder)
refs = os.listdir(ref_folder)
# reference points
# create a dataframe to store all video names for each location
# videoname = "B16_G10_Env25_0001" # historical bryant park
# videoname = "20081008-141944b03" # current bryant park

# videoname = "20100521-074701b06" # downtown crossing example
# frame_start = "3446"

In [None]:
# integrate the start and end timestamp
videopath = pd.read_csv("../../_data/00_raw/_video_meta/video_path_0509.csv")
videopath['video_id'] = videopath['video_name'].apply(lambda x: x.split(".")[0])
# videopath_sel = videopath[videopath['scene'].isin([2,3])]
videopath_sel = videopath[~videopath['ref_path'].isna()].reset_index(drop = True)
videopath_sel['first_effective_time'].unique()
videopath_sel['first_effective_time'] = videopath_sel['first_effective_time'].fillna("12:00:00 AM")
videopath_sel['frame_start'] = videopath_sel['first_effective_time'].apply(lambda x: get_frame_num(x))

# videopath_sel['last_effective_time'] = videopath_sel['last_effective_time'].fillna("12:00:00 AM")
videopath_sel['frame_end'] = videopath_sel['last_effective_time'].apply(lambda x: get_frame_num(x))
videopath_sel['frame_end'] = videopath_sel['frame_end'].fillna(videopath_sel['length'])
videopath_sel['ref_epsg'] = videopath_sel['ref_epsg'].astype(int)



# scene_proj = {
#     0:None,
#     1:None,
#     2: "20100521-074701b06",
#     3: "20100521-115755b02",
    
# }
# videopath_sel['ref_frame'] = videopath_sel['scene'].apply(lambda x: scene_proj[x])

# load finished prediction names
exportfolder = "../../_data/06_attr_result"
finished = os.listdir(exportfolder)
videopath_sel['finished'] = videopath_sel['video_id'].apply(lambda x: x+".csv" in finished)

videopath_sel.head()


In [None]:
group = "20100612-120118" # use no attribute detection results
# group = "20100612-082221"

In [None]:
from tqdm import tqdm

outputfolder = "../../_data/05_tracking_result_projected/step0_no_attr_prj"
if not os.path.exists(outputfolder):
    os.makedirs(outputfolder)
# now_processing = videopath_sel[(videopath_sel["finished"]==True)\
#     &(videopath_sel['video_location']=="Met Steps videos (NEW)")\
#         &(videopath_sel['ref_path'].str.contains('20100612-082221b02'))]\
#     .reset_index(drop = True)
now_processing = videopath_sel[videopath_sel['ref_path'].str.contains(group)].reset_index(drop = True)
now_processingls = now_processing['video_id'].values
now_processingls

In [None]:
now_processingls = ['20100612-120118b03.txt']
def get_proj_video(videoname, ref):
    """For new videos, we extract the detection results from tracks"""
    result_folder = "/Users/yuan/Dropbox (MIT)/whyte_CV/_data/03_tracking_result/_current_video_no_attr"
    predpath = os.path.join(result_folder, f'{videoname}.txt')
    trace = pd.read_csv(predpath, sep = '\t', header = None)
    trace.columns = [ "x1", "y1", "x2", "y2", "track_id", "frame_id"]
    trace['w'] = trace['x2'] - trace['x1']
    trace['h'] = trace['y2'] - trace['y1']
    trace['ratio'] = trace['w']/trace['h']
    _, trace = find_outliers_IQR(trace, 'ratio', 0.15, 0.85)
    trace.rename(columns = {"x1":"bbox0", "y1":"bbox1", "w":"bbox2", "h":"bbox3"}, inplace = True)

    trace['loc_x'] = (trace['bbox0'] + trace['bbox0'] + trace['bbox2'])/2
    trace['loc_y'] = (trace['bbox1'] + trace['bbox3'])
        
    tracept = gpd.GeoDataFrame(
        trace,
        geometry = gpd.points_from_xy(trace.loc_x, trace.loc_y)
    )
    # spatial join to get region name
    tracept_update = gpd.sjoin(tracept, regiondf, how = 'inner') # regiondf currently is a global variable. needs to be updated to match different view angle
    print("region assigned")

    trace = tracept_update.drop("geometry", axis = 1)
    del tracept
    import gc
    gc.collect()
    
    
    h = get_proj_matrix(ref)
# # Set up projection for New York State Plane
# set up to match the projection of the reference data
    # epsg = 3857  #2263
    epsg = videopath_sel[videopath_sel['video_id']==videoname]['ref_epsg'].values[0]
    traceDF = getclean(trace, h, epsg, videoname)
    traceGDF = getgdf(traceDF, epsg)
    
    # attr_df = get_attr(traceDF)

    # drop outliers
    _, traceGDF_keep = find_outliers_IQR(traceGDF, "moving_x")
    

    
    return traceGDF_keep

In [None]:

    
for videoname in tqdm(['20100612-120118b04']):
    print(videoname)
    traceGDF_people = get_all_info(videoname,  
                                useimage = False)
    file_path = videopath[videopath['video_id'] == videoname]['videopath'].values[0]
    # video, fps, size = getbasics(file_path)
    fps = videopath[videopath['video_id'] == videoname]['fps'].values[0]
    
    # only keep the frame_id after the first desired frame
    first_frame_sel = videopath_sel[videopath_sel['video_id']== videoname]['frame_start'].values[0]
    # last_frame_sel = videopath_sel[videopath_sel['video_id']== videoname]['frame_end'].values[0]
    traceGDF_people = traceGDF_people[(traceGDF_people['frame_id']>=first_frame_sel)].reset_index(drop = True)
    
    traceGDF_people["second"] = traceGDF_people["frame_id"]//fps
    # sample = traceGDF_people[traceGDF_people["second"]<20] # pick 20 seconds sample
    loc_name = videopath[videopath['video_id'] == videoname]['video_location'].values[0]
    destfolder = os.path.join(outputfolder, loc_name)
    if not os.path.exists(destfolder):
        os.makedirs(destfolder)
    traceGDF_people.drop("geometry", axis = 1).to_csv(os.path.join(destfolder, f"{videoname}_projected.csv"), index = False)

In [None]:
# 20100612-082221b02_3446_ground_modified.tif

## test draw image to see the detection results

In [None]:
temp = gpd.GeoDataFrame(temp, geometry=gpd.points_from_xy(temp.lon, temp.lat))
temp.crs = "EPSG:4326"

In [None]:
file_path

In [None]:
# historical videofolder:
# file_path = f"../_data/00_raw/_mp4/videos_old_highres/{videoname}.mp4"
# current videofolder:
# file_path = f"../_data/00_raw/videos_current_highres/bryant_park/{videoname}.avi"
videoname = "20100612-120118b01"
videopath = pd.read_csv("../../_data/00_raw/_video_meta/video_path_0509.csv")
videopath['video_id'] = videopath['video_name'].apply(lambda x: x.split(".")[0])
file_path = videopath[videopath['video_id'] == videoname]['videopath'].values[0]
file_path = "../"+file_path
file_path = "/Users/yuan/Dropbox (MIT)/whyte_CV/_data/00_raw/videos_current_highres/Met Steps videos (NEW)/20100521-115754-01/20100612-120118b01.avi"
video, fps, size = getbasics(file_path)


In [None]:


frame_id = 3000
video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
ret, frame = video.read()
# plot the frame
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
fig = plt.subplots(figsize = (10,10))
plt.imshow(frame)

# load traceGDF_people for this video
loc_name = "Met Steps videos (NEW)"
destfolder = os.path.join(outputfolder, loc_name)
filepath = os.path.join(destfolder, f"{videoname}_projected.csv")
traceGDF_people = pd.read_csv(filepath)
temp = traceGDF_people[traceGDF_people["frame_id"] == frame_id]
plt.scatter(
    temp["loc_x"], 
    temp["loc_y"], color = 'red')

# export the x,y coordinates to csv of this one frame
# temp[["geometry","track_id"]].to_file(f"../_data/05_demo/2023-04-30/{videoname}_frame_{frame_id}.geojson", 
# driver = "GeoJSON")

In [None]:
# temp[["geometry","track_id"]].to_file(f"../_data/05_demo/2023-04-30/{videoname}_frame_{frame_id}.geojson", 
# driver = "GeoJSON")

# 2. Visualize the track examples

In [None]:
videopath = pd.read_csv("../../_data/00_raw/_video_meta/video_path.csv")
outputfolder = "../../_data/05_tracking_result_projected/step0_no_attr_prj"
# now_processing = videopath_sel[videopath_sel["finished"]==True].reset_index(drop = True)
# select one video for trace visualization

destfolder = os.path.join(outputfolder, loc_name)
trace = pd.read_csv(os.path.join(destfolder, f"{videoname}_projected.csv"))
traceGDF = gpd.GeoDataFrame(trace, geometry=gpd.points_from_xy(trace.lon, trace.lat))

In [None]:
_, traceGDF = find_outliers_IQR(traceGDF, "moving_x")

In [None]:
epsg = 3857

# plot a trace sample
temp = traceGDF[traceGDF["frame_id"]<4000]
_, temp = find_outliers_IQR(temp, "moving_x")
# construct lines
tracesum = temp.groupby("track_id").size().reset_index().rename(columns = {0:"count"})
trackidls = tracesum[tracesum["count"]>10]["track_id"].unique()
geo_df2 = temp[temp["track_id"].isin(trackidls)].sort_values("frame_id").reset_index(drop = True)\
.groupby(['track_id',"gender", "age"])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()

geo_df2.crs = "EPSG:4326"
geo_df2 = geo_df2.to_crs(f"EPSG:{epsg}")

In [None]:
# assert and print pass
assert geo_df2.crs == f"EPSG:{epsg}", "crs is not correct"

geo_df2.plot(column = "gender", figsize = (10,10))

In [None]:
geo_df2.to_file("MET steps.geojson", driver = "GeoJSON")

# Export at h3 level for occupation rate estimate

In [None]:
fps = 29
traceGDF["second_from_start"] = traceGDF["frame_id"]/fps

traceGDF["minute"] = traceGDF["second_from_start"]//60
traceGDF["hour"] = traceGDF["second_from_start"]//3600
traceGDF["second"] = traceGDF["second_from_start"]- traceGDF["hour"]*3600 - traceGDF["minute"]*60
traceGDF["timestamp"] = "2008-10-08"+" " + traceGDF["hour"].astype(str) + ":"+traceGDF["minute"].astype(str).str.zfill(2)\
    +":"+traceGDF["second"].astype(str)
traceGDF["timestamp"] = pd.to_datetime(traceGDF["timestamp"])


# Convert all dectection to hexagon 15 for aggregation

In [None]:
# traceGDF.to_file(os.path.join(clipfolder, f"{videoname[:-4]}_prediction_with_attr.geojson"), driver = "GeoJSON")

In [None]:
traceGDF = pd.read_csv(r"D:\Dropbox (MIT)\whyte_CV\_data\05_tracking_result_projected\step0_attr_prj\20081008-141944b03_projected_with_attr.csv")
traceGDF.head()

In [None]:
from h3 import h3
res = 15
traceGDF[f"h3_{res}"] = traceGDF.apply(lambda row: h3.geo_to_h3(row["lat"], row["lon"], res), axis = 1)


In [None]:
# count people per h3 id per minute per gender

countpeople_gender = traceGDF.groupby([f"h3_{res}","gender"])["track_id"].nunique().reset_index()\
    .pivot(columns = "gender", index = ["h3_15"], values = "track_id").reset_index().fillna(0)
countpeople_gender
# countpeople_gender.to_csv(os.path.join(outfolder, f"{videoname}_prediction_aggregation.csv"), index = False)

In [None]:
countpeople_age = traceGDF.groupby([f"h3_{res}","age"])["track_id"].nunique().reset_index()\
    .pivot(columns = "age", index = ["h3_15"], values = "track_id").reset_index().fillna(0)
countpeople_age

In [None]:
summary = countpeople_gender.merge(countpeople_age, on = ["h3_15"], how = "outer")
summary["total"] = summary["Female"]+summary["Male"]
outfolder = "../_data/05_tracking_result_projected"
summary.to_csv(os.path.join(outfolder, f"{videoname}_prediction_aggregation_overall.csv"), index = False)

In [None]:
summary = traceGDF_keep.groupby("track_id").size().reset_index()
summary[summary[0]>fps].shape


In [None]:
countpeople = traceGDF_keep.groupby([f"h3_{res}"])["track_id"].nunique().reset_index()
countpeople.to_csv(os.path.join(clipfolder, f"{videoname}_prediction_aggregation_all.csv"), index = False)