In [None]:
from h3 import h3
import numpy as np
import cv2
import glob
from matplotlib import pyplot as plt
import os
import pandas as pd
from osgeo import gdal
from osgeo import osr
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
# seaborn stacked bar plot per frame the stationary and moving people
import seaborn as sns
import matplotlib.ticker as mtick
def imshow(image, show_axes = False, quiet = False):
    if len(image.shape) == 3:
      # Height, width, channels
      # Assume BGR, do a conversion since 
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    else:
      # Height, width - must be grayscale
      # convert to RGB, since matplotlib will plot in a weird colormap (instead of black = 0, white = 1)
      image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    # Draw the image
    plt.imshow(image)
    if not show_axes:
        # We'll also disable drawing the axes and tick marks in the plot, since it's actually an image
        plt.axis('off')
    if not quiet:
        # Make sure it outputs
        plt.show()

In [None]:
from tqdm import tqdm
outputfolder = "../data/05_tracking_result_projected/step0_attr_prj"


def get_frame_num(time_str, fps = 29.97002997002997):
    try:
        time = time_str.split(" ")[0][3:].split(":")
        minute = int(time[0])
        second = int(time[1])
        frame = minute*60*fps + second*fps
        return int(frame)
    except:
        return 0
    
# aggregate unique number of people appeared for each frame on a time series
# load a sample video first
def load_video(videoname):

    loc_name = videopath_sel[videopath_sel['video_id']==videoname]['video_location'].values[0]
    video_start_frame = videopath_sel[videopath_sel['video_id']==videoname]['frame_start'].values[0]
    video_start_at = videopath_sel[videopath_sel['video_id']==videoname]['video_section_started_at'].values[0]
    video_group = videopath_sel[videopath_sel['video_id']==videoname]['video_group'].values[0]

    destfolder = os.path.join(outputfolder, loc_name)
    traceGDF = pd.read_csv(os.path.join(destfolder, f"{videoname}_projected.csv"))
    traceGDF = traceGDF[traceGDF['frame_id']>video_start_frame].reset_index(drop = True)
    traceGDF['timestamp'] = video_start_at + traceGDF['frame_id'].apply(lambda x: pd.Timedelta(seconds = x/29.97))
    traceGDF['video_group'] = video_group
    traceGDF['videoname'] = videoname
    
    
    return traceGDF

# drop the outlier speed
def find_outliers_IQR(df, field):

   q1=df[field].quantile(0.25)

   q3=df[field].quantile(0.75)

   IQR=q3-q1

   outliers = df[((df[field]<(q1-1.5*IQR)) | (df[field]>(q3+1.5*IQR)))]

   keep = df[((df[field]>=(q1-1.5*IQR)) & (df[field]<=(q3+1.5*IQR)))]

   return outliers, keep

In [None]:
videopath = pd.read_csv("../../_data/00_raw/_video_meta/video_path_0509.csv")
videopath['video_id'] = videopath['video_name'].apply(lambda x: x.split(".")[0])

# videopath_sel = videopath[videopath['scene'].isin([2,3])]
videopath_sel = videopath[~videopath['ref_path'].isna()].reset_index(drop = True)
videopath_sel['first_effective_time'].unique()
videopath_sel['first_effective_time'] = videopath_sel['first_effective_time'].fillna("12:00:00 AM")
videopath_sel['frame_start'] = videopath_sel['first_effective_time'].apply(lambda x: get_frame_num(x))
videopath_sel['frame_end'] = videopath_sel['last_effective_time'].apply(lambda x: get_frame_num(x))
videopath_sel['frame_end'] = videopath_sel['frame_end'].fillna(videopath_sel['length'])
videopath_sel['ref_epsg'] = videopath_sel['ref_epsg'].astype(int)
videopath_sel['video_date'] = videopath_sel['video_group_started_at'].apply(lambda x: x.split(" ")[0])
videopath_sel['video_section_started_at'] = pd.to_datetime(videopath_sel['video_date']+ " " +videopath_sel['video_section_started_at'])
videopath_sel['video_group_update'] = videopath_sel['video_location']+"-"+videopath_sel['video_group']
videols = videopath_sel['video_id'].unique().tolist()
videopath_sel['video_group_update'] = videopath_sel['video_location']+"-"+videopath_sel['video_group']

In [None]:

def get_alltrace(selvideogroup, n = 1, res = 15):
    selpath = videopath_sel[videopath_sel['video_group_update']==selvideogroup].reset_index(drop = True)
    fulldf = []
    for videoname in tqdm(selpath['video_name'].unique()):
        try:
            traceDF = pd.read_csv(os.path.join(stagingfolder, videoname[:-4]+".csv"))
            traceGDF = gpd.GeoDataFrame(traceDF, geometry=gpd.points_from_xy(traceDF.lon, traceDF.lat))
            traceGDF.crs = "EPSG:4326"
            traceGDF.to_crs(epsg=3857, inplace=True)
            threshold_ls  = [0.1, 0.2, 0.3, 0.4, 0.5]
            traceGDF[f'h3_{res}'] = traceGDF.apply(lambda row: h3.geo_to_h3(row['lat'], row['lon'], res), axis=1)

            for i, thred in enumerate(threshold_ls):
                traceGDF['stationary_{}'.format(i+1)] = traceGDF[f'speed_{n}s'].apply(lambda x: 1 if x<thred else 0)
            fulldf.append(traceGDF)
        except:
            continue
    fulldf = pd.concat(fulldf).reset_index(drop = True)
    return fulldf

def get_geo(selvideogroup):
    h3folderagg = '../../_data/05_tracking_result_projected/step5_h3_video_level'
    h3geo = gpd.read_file(os.path.join(h3folderagg, f"countdfsum_{res}.geojson"))
    h3geosel = h3geo[h3geo['video_group_update']==selvideogroup].reset_index(drop = True)
    return h3geosel

In [None]:
stagingfolder = '../../_data/05_tracking_result_projected/step1_speed_vector'
if not os.path.exists(stagingfolder):
    os.makedirs(stagingfolder)

# constant
n = 1 # second threshold to compute speed, matching the historical videos
res = 15


In [None]:
# only select the stationary moment, and count the number of unique stationary people per frame per h3
# t = 3
# selvideogroup = 'bryant_park-20081008-141944'
# for selvideogroup in ['Met Steps videos (NEW)-20100612-120118']:
#     traceGDF = get_alltrace(selvideogroup)
#     h3geosel = get_geo(selvideogroup)
#     if h3geosel.shape[0] == 0:
#         print("load geometry unsuccessfully")
#         continue
#     robusttest = []
#     for t in [1,2,3,4,5]:
#         st_gdf = traceGDF[traceGDF[f'stationary_{t}']==1].reset_index(drop = True)
#         st_gdf_h3 = st_gdf.groupby(['frame_id', f'h3_{res}'])['track_id'].nunique().reset_index()\
#             .groupby(f'h3_{res}')['track_id'].mean().reset_index()
#         st_gdf_h3 = st_gdf_h3.rename(columns = {'track_id': 'stationary_count_mean'})
#         st_gdf_h3 = h3geosel.merge(st_gdf_h3, on = f'h3_{res}',  how = 'left').fillna(0)
#         st_gdf_h3['log_stationary_count_mean'] = np.log(st_gdf_h3['stationary_count_mean']+1)
#         st_gdf_h3['stationary_thred'] = t
#         robusttest.append(st_gdf_h3)
#     robusttest = pd.concat(robusttest).reset_index(drop = True)
#     # st_gdf_h3.plot(figsize = (10,10), column = 'log_stationary_count_mean', legend = True)
#     h3folderagg = '../../_data/05_tracking_result_projected/step5_h3_video_level'
#     robusttest.to_file(os.path.join(h3folderagg, f"{selvideogroup}_stationary_loc_robusttest_{res}.geojson"), driver='GeoJSON')

# Plot speed distribution across location and time

In [None]:
# load all speed vector file
# CURRENTLY USING lat lon directly, may need to change lat_moving
fullDF = []
for selvideogroup in videopath_sel['video_group_update'].unique():
    # try:
    temp = get_alltrace(selvideogroup)
    temp['video_group_update'] = selvideogroup
    fullDF.append(temp)
    # except:
    #     continue
# fullDF = pd.concat(temp).reset_index(drop = True)

In [None]:
fullDF = pd.concat(fullDF).reset_index(drop = True)

In [None]:
fullDF['x_2263'].notna().sum()

In [None]:
selcols = [
    'track_id', 'frame_id', 'score', 'side', 'glasses', 'hat',
       'hold_objects_in_front', 'bag', 'upper', 'lower', 'boots', 'loc_x',
       'loc_y', 'x_3857', 'y_3857', 'video_id', 'moving_x', 'moving_y', 'lat',
       'lon', 'lat_moving', 'lon_moving', 'gender', 'age', 'second',
       'timestamp', 'video_group', 'videoname', 'move_m_1s', 'speed_1s',
       'dist_x_1s', 'dist_y_1s', 'speed_x_1s', 'speed_y_1s', 'video_location',
       'geometry', 'h3_15', 'stationary_1', 'stationary_2', 'stationary_3',
       'stationary_4', 'stationary_5', 'video_group_update'
]

In [None]:
fullDF.columns

In [None]:

morningls = ['bryant_park-20081008-072238',
             'Chestnut Street videos (NEW)-20100519-083343',
             'Downtown Crossing videos (NEW)-20100521-074701',
             'Met Steps videos (NEW)-20100612-082221'
             ]
noon = ['bryant_park-20081008-141944',
             'Downtown Crossing videos (NEW)-20100521-115755',
             'Met Steps videos (NEW)-20100612-120118'
             ]
colorset = {
    'bryant_park-20081008-072238':"#ef5c43",
             'Chestnut Street videos (NEW)-20100519-083343':"#ffdda6",
             'Downtown Crossing videos (NEW)-20100521-074701': "#3bc0cf",
             'Met Steps videos (NEW)-20100612-082221':"#0c7cba",
          'bryant_park-20081008-141944':"#ef5c43",
             'Downtown Crossing videos (NEW)-20100521-115755': "#3bc0cf",
             'Met Steps videos (NEW)-20100612-120118' :"#0c7cba",  
}
labelset = {
   'bryant_park-20081008-072238':"Bryant Park",
             'Chestnut Street videos (NEW)-20100519-083343':"Chestnut Street",
             'Downtown Crossing videos (NEW)-20100521-074701':"Downtown Crossing",
             'Met Steps videos (NEW)-20100612-082221':"MET",
          'bryant_park-20081008-141944':"Bryant Park",
             'Downtown Crossing videos (NEW)-20100521-115755':"Downtown Crossing",
             'Met Steps videos (NEW)-20100612-120118' :"MET", 
}

data = fullDF[fullDF[f'speed_{n}s']<2].reset_index(drop = True) # filter out outlier speed

In [None]:
graphicfolder = "../../_graphics/viz"

In [None]:

def get_hist_group(morningls, filename):
    fig, ax = plt.subplots(figsize = (5,5))
    for i, selvideogroup in enumerate(morningls):
        temp = data[data['video_group_update']==selvideogroup].reset_index(drop = True)
        sns.kdeplot(data = temp, 
                x = f'speed_{n}s',
                ax = ax,
                clip = (0, 2.5),
                fill=True,
                color = colorset[selvideogroup],
                linewidth=0,
                alpha = 0.5,
                bw_adjust=5,
                label = labelset[selvideogroup]
                )

    # move legend outside the plot
    ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    sns.despine()
    plt.xlabel("Moving Speed of All Objects (m/s)")
    plt.ylabel("Density")
    fig.savefig(os.path.join(graphicfolder, 
                             f"hist_{filename}.svg"), 
                format = "svg", 
                dpi = 300, 
                bbox_inches = "tight")

In [None]:
get_hist_group(morningls, "morning_moving_speed")

In [None]:
get_hist_group(noon, "noon_moving_speed")

In [None]:



    
get_hist_group(morningls, "morning_moving_speed")

In [None]:
get_hist_group(noon, "noon_moving_speed")

In [None]:
# conduct t-test between the two groups
import scipy.stats as stats
# compare Downtown crossing's morning and afternoon, only consider the moving people (speed >= 0.3)
def get_ttest(location):
    dt_moving = data[(data['video_group_update'].str.contains(location, case = False))&\
        (data['speed_2s']>=0.3)
                    ].reset_index(drop = True)
    dt_moving.groupby('video_group_update')['speed_2s'].describe()
    dt_moving_morning = dt_moving[dt_moving['video_group_update'].isin(morningls)].reset_index(drop = True)
    dt_moving_afternoon = dt_moving[dt_moving['video_group_update'].isin(noon)].reset_index(drop = True)
    md = dt_moving_morning['speed_2s'].mean()-dt_moving_afternoon['speed_2s'].mean()
    # compare the mean speed_2s of two group
    result = stats.ttest_ind(dt_moving_morning['speed_2s'], dt_moving_afternoon['speed_2s'])
    return result, md


In [None]:
t_ls = []
rho_ls = []
mean_ls = []
locationls = ["Downtown", 
              'Bryant', 
              'MET']

for location in locationls:
    result, meandiff = get_ttest(location)
    t_ls.append(result[0])
    rho_ls.append(result[1])
    mean_ls.append(meandiff)
df = pd.DataFrame({'location':locationls, 't':t_ls, 'rho':rho_ls, 'mean_diff':mean_ls})
df.head()

In [None]:
df.to_csv(os.path.join(graphicfolder, 'walking_speed_ttest.csv'), index = False)

## Compare the stationary rank between morning and noon

In [None]:
# reload the data
res = 15
allstationary = []
for selvideogroup in videopath_sel['video_group_update'].unique():
    try:
        temp = gpd.read_file(os.path.join(h3folderagg, f"{selvideogroup}_stationary_loc_robusttest_{res}.geojson"))
        temp['video_group_update'] = selvideogroup
        allstationary.append(temp)
    except:
        continue

In [None]:
allstationary = pd.concat(allstationary).reset_index(drop = True)
allstationary.head()

In [None]:
morningls = ['bryant_park-20081008-072238',
             'Chestnut Street videos (NEW)-20100519-083343',
             'Downtown Crossing videos (NEW)-20100521-074701',
             'Met Steps videos (NEW)-20100612-082221'
             ]

In [None]:
allstationary['timegroup'] = np.where(allstationary['video_group_update'].isin(morningls), "morning", "noon")
allstationary.groupby('timegroup')['location'].unique()

In [None]:
wide = allstationary.pivot(columns = ['timegroup'],
                    index = ['stationary_thred', 'location','h3_15'],
                    values = 'stationary_count_mean').reset_index().dropna()
wide[wide['stationary_thred']==5].head()

In [None]:
fig, ax = plt.subplots(figsize = (5,5))
sns.scatterplot(data = wide[wide['stationary_thred']==3],
                x = 'morning',
                y = 'noon',
                hue = 'location',
                style='stationary_thred'
)
sns.despine()

In [None]:
data = allstationary[allstationary['stationary_thred']==3].reset_index(drop = True)
data['stationary_rank'] = data.groupby('video_group_update')['stationary_count_mean'].rank(ascending = False)
data['log_stationary_rank'] = np.log(data['stationary_rank'])
data.head()

In [None]:
fig, ax = plt.subplots(figsize = (5,5))
sns.scatterplot(data = data[data['stationary_count_mean']>0],
           x ='log_stationary_count_mean',
           y = 'log_stationary_rank',
           hue = 'video_group_update',
)
sns.despine()
# put legend outside the plot
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
ax.set_xlabel("Log of Stationary Count Per Frame")
ax.set_ylabel("Log of Rank of Stationary Count Per Frame")

In [None]:
sns.kdeplot(data = allstationary[allstationary['stationary_thred']==3],
            x = 'stationary_count_mean',
            hue = 'video_group_update')

# Archived other

In [None]:
# import plotly.express as px

# data =st_gdf_h3[st_gdf_h3['stationary_count_mean']>0].reset_index(drop = True)
# fig = px.choropleth_mapbox(data, 
#                         # lat="centroid_y", 
#                         # lon="centroid_x", 
#                         geojson = data['geometry'],
#                         locations = data.index,
#                         hover_name="h3_15",
#                         color='log_stationary_count_mean',
#                         # set stroke width to zero
                        
#                         # hover_data=["stationary_count_mean"],
#                         mapbox_style="carto-positron",
#                         color_continuous_scale='OrRd',
#                         center = {"lat":data['centroid_y'].mean(),
#                                   "lon":data['centroid_x'].mean()
#                                       },
#                         # color_discrete_sequence=["fuchsia"], 
#                         zoom=18, 
#                         opacity=0.5,
#                         labels={'log_stationary_count_mean':'Log(Stationary Count Mean)'}
#                         # height=300
#                         )
# # fig.update_layout(mapbox_style="open-street-map")
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.show()

In [None]:
fig, ax = plt.subplots(figsize = (10,10))
st_gdf_h3[st_gdf_h3['stationary_count_mean']>0].plot(ax = ax, 
                                                     column = 'log_stationary_count_mean', 
                                                     legend = True,
                                                     colormap='OrRd')
# turn off axis
ax.axis('off')

In [None]:
import seaborn as sns
fig, ax = plt.subplots(figsize = (5,5))
sns.distplot(st_gdf_h3['log_stationary_count_mean'], ax = ax)
sns.despine()

In [None]:
fig, ax = plt.subplots(figsize = (5,5))
data = traceGDF[traceGDF['speed_2s']<3]
sns.kdeplot(data = data, 
            x = 'speed_2s',
            ax = ax,
            clip = (0, 2.5),
            fill=True,
            linewidth=0,
            alpha = 0.5,
            bw_adjust=5,
            )
sns.despine()

In [None]:
plt.hist(traceGDF[traceGDF['speed_2s']<2.5]['speed_2s'], bins = 50)