In [1]:
import pandas as pd
import matplotlib.pyplot as plt


df = pd.read_csv('trucks.csv')
df

Unnamed: 0,seq,type,timestamp,truckId,positionLatitude,positionLongitude,equipType,nextTripLengthPreference
0,15,Truck,2023-11-19T08:12:28,101,34.294167,-85.879997,Reefer,Long
1,16,Truck,2023-11-19T08:19:59,134,39.913216,-86.210175,Flatbed,Short
2,17,Truck,2023-11-19T08:20:41,121,36.077549,-87.120148,Van,Short
3,19,Truck,2023-11-19T08:28:02,106,32.505447,-85.605026,Van,Long
4,20,Truck,2023-11-19T08:29:42,147,35.794121,-86.377892,Flatbed,Long
...,...,...,...,...,...,...,...,...
1995,2496,Truck,2023-11-19T22:58:03,238,42.132271,-72.051010,Reefer,Short
1996,2497,Truck,2023-11-19T22:59:24,353,42.473766,-79.276031,Flatbed,Short
1997,2498,Truck,2023-11-19T22:59:27,290,39.569466,-76.139107,Van,Short
1998,2499,Truck,2023-11-19T22:59:35,241,38.293682,-85.542076,Flatbed,Long


In [2]:
serie = df['truckId'].value_counts()
serie


366    19
106    14
292    14
110    14
331    14
       ..
123     1
400     1
197     1
219     1
222     1
Name: truckId, Length: 322, dtype: int64

In [3]:
df.loc[df['truckId'] == 163]

Unnamed: 0,seq,type,timestamp,truckId,positionLatitude,positionLongitude,equipType,nextTripLengthPreference
144,230,Truck,2023-11-19T10:13:14,163,39.345943,-84.611092,Van,Long
221,348,Truck,2023-11-19T10:53:18,163,39.058472,-84.46965,Van,Long
988,1450,Truck,2023-11-19T16:09:41,163,39.803234,-83.208191,Van,Long
1092,1575,Truck,2023-11-19T16:49:44,163,40.224995,-82.92907,Van,Long
1215,1709,Truck,2023-11-19T17:29:50,163,40.777355,-82.416084,Van,Long
1418,1912,Truck,2023-11-19T18:38:20,163,41.035187,-81.755264,Van,Long
1466,1961,Truck,2023-11-19T18:53:19,163,41.036072,-81.50489,Van,Long
1816,2316,Truck,2023-11-19T21:20:21,163,41.204025,-79.9216,Van,Long
1893,2393,Truck,2023-11-19T22:00:23,163,41.170616,-79.101173,Van,Long
1952,2452,Truck,2023-11-19T22:30:25,163,41.083035,-78.506371,Van,Long


In [None]:
import matplotlib.dates as mdates

# Grouping by timestamp and counting the number of trucks
time_series = df.groupby('timestamp').size()

# Plotting the time series
plt.figure(figsize=(12, 6))
time_series.plot(kind='line')

plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval=30))
plt.xticks(rotation=45)

plt.xlabel('Timestamp')
plt.ylabel('Number of Trucks')
plt.title('Truck Reporting Times')
plt.grid(True)
plt.show()

In [None]:
df = pd.read_csv('notifications.csv')
df

In [None]:
bar_data = df.groupby(['equipType', 'nextTripLengthPreference']).size().unstack()

# Plotting the bar chart
bar_data.plot(kind='bar', stacked=True)

plt.xlabel('Equipment Type')
plt.ylabel('Count')
plt.title('Bar Chart of Equipment Type and Trip Length Preference')
plt.xticks(rotation=45)
plt.show()

In [None]:
serie = df['truck_id'].value_counts()
serie

In [None]:
import seaborn as sns
import numpy as np

plt.figure(figsize=(10, 6))
sns.histplot(serie, kde=True)

plt.title('Distribution of Notification Counts')
plt.xlabel('Notification Count')
plt.ylabel('Frequency')
plt.show()

In [None]:
import pandas as pd

df['timestamp'] = pd.to_datetime(df['timestamp'])

grouped = df.groupby('truck_id')['timestamp'].agg(['max', 'min', 'size']).reset_index()

df_truck = pd.DataFrame({'truck_id': df['truck_id'].unique()})
df_truck = pd.merge(df_truck, grouped, how='left', on='truck_id')

df_truck = df_truck.rename(columns={'max': 'last_ping_time', 'min': 'first_ping_time', 'size': 'occurrences'})
df_truck['day_length'] = df_truck['last_ping_time'] - df_truck['first_ping_time']

df_truck

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_truck['day_length'].dt.total_seconds()/60/60, df_truck['occurrences'])
plt.xlabel('Day Length (Hours)')
plt.ylabel('Notification Count')
plt.title('Notification Count vs. Day Length')
plt.grid(True)
plt.show()

In [None]:
discarded_df = pd.read_csv('discarded_notifications.csv')
discarded_df

In [None]:
discarded_count = discarded_df.groupby('truck_id').size().reset_index(name='discarded_amount')

df_truck = pd.merge(df_truck, discarded_count, how='left', on='truck_id')

In [None]:
df_truck

In [None]:
df_truck['percentage'] = (df_truck['occurrences'] / (df_truck['occurrences'] + df_truck['discarded_amount'])) * 100

# Plotting the distribution of these percentages
plt.figure(figsize=(10, 6))
plt.hist(df_truck['percentage'], bins=10, alpha=0.7, color='blue', edgecolor='black')
plt.xlabel('Percentage of Occurrences')
plt.ylabel('Frequency')
plt.title('Distribution of the Percentage of Occurrences')
plt.grid(True)
plt.show()