In [98]:
import pandas as pd

from main import gps_data_utils

import geopandas as gpd
from geopy.distance import geodesic

import folium
from folium.plugins import MarkerCluster

In [99]:
df = pd.read_csv('trip_data_10Units_1Year_V2.csv')
df = gps_data_utils.validate_mandatory_cols(df,'RowReferenceTime','lat','lon','Speed','UnityLicensePlate')
df_gpd = gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df['lat'],df['lon']),crs='EPSG:4326')

In [100]:
def ret_geo(p1y,p1x,p2y,p2x):
    return geodesic((p1y,p1x),(p2y,p2x)).meters

In [101]:
grouped = df_gpd.groupby('geometry').size().sort_values(ascending=False).reset_index(name='Count')
if len(grouped) < 1500000: 
    factor = 0.01
    end= int(len(grouped)*factor)
else:
    factor = 0.005
    end= int(len(grouped)*factor)

In [102]:
counts = []
other_counts = 0

for i in range(0, len(grouped)):
    if len(counts) < end:
        if i > 0:
            distance = ret_geo(grouped['geometry'][i].x,grouped['geometry'][i].y,grouped['geometry'][i-1].x,grouped['geometry'][i-1].y)
            if distance > 500.0:
                counts.append([[grouped['geometry'][i].x,grouped['geometry'][i].y],grouped['Count'][i]])
            else:
                other_counts += grouped['Count'][i]
        else:
            counts.append([[grouped['geometry'][i].x,grouped['geometry'][i].y],grouped['Count'][i]])
    else:
        other_counts += grouped['Count'][i]
        
counts = pd.DataFrame(counts,columns=['coords','count']).sort_values(by='count',ascending=False).reset_index()

In [103]:
counts[['lat', 'lon']] = pd.DataFrame(counts['coords'].tolist(), columns=['lat', 'lon'])

counts['lat'] = counts['lat'].astype(float)
counts['lon'] = counts['lon'].astype(float)

counts_gpd = gpd.GeoDataFrame(counts,geometry=gpd.points_from_xy(counts['lon'],counts['lat']),crs='EPSG:4326')

In [105]:
maps = folium.Map(max_zoom=24)
fg = MarkerCluster().add_to(maps)

for i in range(len(counts_gpd)):
    if i in counts_gpd.index:
        point1 = counts_gpd.iloc[i]
        for j in range(len(counts_gpd)):
            if j in counts_gpd.index:
                point2 = counts_gpd.iloc[j]
                distance = ret_geo(point1['geometry'].y,point1['geometry'].x,point2['geometry'].y,point2['geometry'].x)
                if distance <= 500.0:
                    counts_gpd.at[i, 'count'] += counts_gpd.at[j, 'count']
                    counts_gpd = counts_gpd.drop(j)
                    counts_gpd = counts_gpd.reset_index(drop=True)
                elif 0.0>=distance:
                    print(distance)
counts_gpd = counts_gpd.sort_values(by='count', ascending=False).reset_index(drop=True)

for i in range(20):
    row = counts_gpd.iloc[i]
    folium.Marker(location=[row['lat'], row['lon']],popup=f"Index:{row.name}<br>Count:{row['count']}").add_to(fg)

In [None]:
print(counts_gpd['count'].sum() + other_counts - len(df))
print(counts_gpd['count'].sum() + other_counts)
print(counts['count'].sum() + other_counts)
print(len(df))
print(counts_gpd['count'].sum())
print(counts['count'].sum())


-2317
1753734
1756051
1756051
149817
152134


In [None]:
maps.fit_bounds(fg.get_bounds())
maps