In [9]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
import seaborn as sns
import math

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth specified in decimal degrees using the Haversine formula.
    """
    # Convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def calculate_distance(positions):
    """
    Calculate normalized cumulative distances (s values) for a list of positions.
    Each position is a tuple (latitude, longitude).
    """
    distances = []
    cumulative_distances = [0]  # Start with 0 for the first position

    # Calculate distances between consecutive points
    for i in range(1, len(positions)):
        lat1, lon1 = positions[i - 1]
        lat2, lon2 = positions[i]
        dist = haversine_distance(lat1, lon1, lat2, lon2)
        
        distances.append(dist)
        cumulative_distances.append(cumulative_distances[-1] + dist)

    # Normalize cumulative distances
    total_distance = cumulative_distances[-1]
    
    # s_values = [cd / total_distance for cd in cumulative_distances]

    return total_distance


with open('datasets/teconer_helsinki_jan2018_df.pkl', 'rb') as f:
    df = pickle.load(f)
    print(df.columns)


Index(['UnixTime', 'Timestamp', 'Latitude', 'Longitude', 'Height', 'Speed',
       'Direction', 'Ta', 'Tsurf', 'Distance', 'S1', 'S2', 'S3', 'S9', 'S10',
       'S11', 'Hour', 'Month', 'Day', 'VehicleID', 'TripID', 'Friction'],
      dtype='object')


ZeroDivisionError: division by zero

In [11]:
# print the number of unuique TripID
info = {}
info['Number of Trips'] = len(df['TripID'].unique())
info['Number of Records'] = len(df)
info['Friction Mean'] = df['Friction'].mean()
info['Friction Std'] = df['Friction'].std()
info['Average Speed'] = df['Speed'].mean()


# group by TripID
grouped = df.groupby('TripID')
trip_lengths = []
# calculate s values for each trip as an additional column
s_values = []
for name, group in grouped:
    # calculate the distance
    dist = calculate_distance(list(zip(group['Latitude'], group['Longitude'])))
    trip_lengths.append(dist)
    

info['Average Trip Length'] = np.mean(trip_lengths)

print(info)

{'Number of Trips': 876, 'Number of Records': 1845547, 'Friction Mean': 0.6343880378012585, 'Friction Std': 0.1673198325555451, 'Average Speed': 20.843659684635504, 'Average Trip Length': 14.67026014138912}


In [30]:
%matplotlib qt
plt.close('all')

# randomly sample 10K records
# sample = df.sample(1000000)
sample = df
# scatter plot the lat long or the records
# markers = [[60.235312, 24.899538],[60.237943, 25.040537], [60.209376, 24.815294]]
plt.figure(figsize=(10, 10))


plt.scatter(sample['Longitude'], sample['Latitude'], s=0.1, alpha=0.02)
# plt.scatter([m[1] for m in markers], [m[0] for m in markers], c='red', s=100, marker='x')
# mark the city Espoo with 60.205620, 24.656452
# plt.scatter(24.947722,60.167789, c='green', s=100, marker='x')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
# make the plot square
plt.gca().set_aspect('equal', adjustable='box')

In [49]:
%matplotlib qt
plt.close('all')

# for i in range(20):
# select samples that are within the first week of January
sample = df[(df['Timestamp'] >= '2018-01-03') & (df['Timestamp'] < '2018-01-04')]


# scatter plot the lat long or the records
# markers = [[60.235312, 24.899538],[60.237943, 25.040537], [60.209376, 24.815294]]
plt.figure(figsize=(12, 5))
plt.scatter(sample['Longitude'], sample['Latitude'], c=sample['Friction'], cmap='viridis', s=0.05, alpha=0.9, vmin=0.2, vmax=0.8)
plt.title('helsinki_day_4')
# place colorbar
plt.colorbar(label='Friction')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
# make the plot square
plt.gca().set_aspect('equal', adjustable='box')
# save the plot
plt.savefig('helsinki_day_4_bar.png')