In [1]:
import os.path
import pickle
import re
import json
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

In [2]:
def split_line_desc(df):
    """
    takes the Lines_desc feature and divides it to route_id, direction and alternative features.
    """
    
    # split LINE_DESC column by '-' and take the 0th element to create 'ID' column
    df['RouteId'] = df['LINE_DESC'].str.split('-').str[0]
    
    # split LINE_DESC column by '-' and take the 1st element to create 'DIRECTION' column
    df['Direction'] = df['LINE_DESC'].str.split('-').str[1]
    
    # split LINE_DESC column by '-' and take the 1st element to create 'DIRECTION' column
    df['Alternative'] = df['LINE_DESC'].str.split('-').str[2]
    return df

In [3]:
def add_columns_from_json(row):
    """
    splits the jason column to different features and adds them to the dataframe.
    rows_list - list of dictionaries that will become the new data set.
    bad_jsons - a counter for how many jsons we didn't read.
    """
    ERROR = 1
    global bad_jsons
    global rows_list
    
    json_data = json.loads(row['jsonString'])
            
    for record in json_data['tripMessages']:
        try:
            for report in record['tripMessage']:
                try:
                    if report['vehicleAtStop']: # taking only data when bus is in stop, or else we will have duplicates
                        if report['previousCalls']:
                            try:
                                actualArrivalTime = report['previousCalls'][0]['actualArrivalTime']
                            except:
                                actualArrivalTime = record['tripMessage'][0]['aimedDepartureTime']
                                #if report['previousCalls'][0]['stopOrder'] != 1:
                                #   ERROR = 0
                                    # raises an assertion if the stop order doesn't equal to 1.
                                #  assert(0)

                            row_data = {
                                        "DatedVehicleJourneyRef": row['DatedVehicleJourneyRef'],
                                        "LINE_DESC": row["LINE_DESC"],
                                        "OPERATOR_ID": row["OPERATOR_ID"],
                                        "CLUSTER_ID": row["CLUSTER_ID"],
                                        "LINE_SHORT_NAME": row["LINE_SHORT_NAME"],
                                        "OriginAimedDepartureTime": row["OriginAimedDepartureTime"],
                                        "RouteId": row["RouteId"],
                                        "Direction": row["Direction"],
                                        "Alternative": row["Alternative"],
                                        "stopOrder": report['previousCalls'][0]['stopOrder'],
                                        "stopId": report['previousCalls'][0]['stopId'],
                                        "actualArrivalTime": actualArrivalTime,
                                        "actualDepartureTime": report['previousCalls'][0]['actualDepartureTime'],
                                        "License_Plate": row['VehicleRef'],
                                        "Trip_End": False
                                       }
                            rows_list.append(row_data)
                        if report['tripEndReasonCode']: # in case of last stop for the trip
                            row_data = {
                                        "DatedVehicleJourneyRef": row['DatedVehicleJourneyRef'],
                                        "LINE_DESC": row["LINE_DESC"],
                                        "OPERATOR_ID": row["OPERATOR_ID"],
                                        "CLUSTER_ID": row["CLUSTER_ID"],
                                        "LINE_SHORT_NAME": row["LINE_SHORT_NAME"],
                                        "OriginAimedDepartureTime": row["OriginAimedDepartureTime"],
                                        "RouteId": row["RouteId"],
                                        "Direction": row["Direction"],
                                        "Alternative": row["Alternative"],
                                        "stopOrder": report['stopOrder'],
                                        "stopId": report['stopId'],
                                        "actualArrivalTime": report['actualArrivalTime'],
                                        "actualDepartureTime": None,
                                        "License_Plate": row['VehicleRef'],
                                        "Trip_End": True
                                       }
                            rows_list.append(row_data)
                except:
                    pass
                    #not an error not all entry's have the section previousCalls
        except:
            # print(f'bad json {bad_jsons}')
            bad_jsons += 1

In [4]:
def create_links_data(df):
    """
    gets data frame with arrival and departure times for a stop and returns a dataset with time for link.
    """
    # Convert necessary columns to datetime objects if needed
    df['actualDepartureTime'] = pd.to_datetime(df['actualDepartureTime'])
    df['actualArrivalTime'] = pd.to_datetime(df['actualArrivalTime'])
    df['OriginAimedDepartureTime'] = pd.to_datetime(df['OriginAimedDepartureTime'])

    # Preallocate the output list
    output_rows = [None] * len(df)

    for i in tqdm(range(1, len(df)), desc='calculating links'):
        prev_row = df.iloc[i - 1]
        curr_row = df.iloc[i]

        if (
            curr_row['OriginAimedDepartureTime'] == prev_row['OriginAimedDepartureTime']
            and prev_row['stopOrder'] + 1 == curr_row['stopOrder']
        ):
            # Calculate the link travel time in seconds
            link_travel_time = int((curr_row['actualArrivalTime'] - prev_row['actualDepartureTime']).total_seconds())

            # Calculate the time spent at the previous stop in seconds
            time_in_stop = int((prev_row['actualDepartureTime'] - prev_row['actualArrivalTime']).total_seconds())

            # Create a new dictionary with the calculated data
            output_rows[i] = {
                "TripId": curr_row['DatedVehicleJourneyRef'],
                "OPERATOR_ID": curr_row["OPERATOR_ID"],
                "CLUSTER_ID": curr_row["CLUSTER_ID"],
                "License_Plate": curr_row['License_Plate'],
                "LINE_SHORT_NAME": curr_row["LINE_SHORT_NAME"],
                "OriginAimedDepartureTime": curr_row["OriginAimedDepartureTime"],
                "LINE_DESC": curr_row["LINE_DESC"],
                "RouteId": curr_row['RouteId'],
                'Direction': curr_row['Direction'],
                'Alternative': curr_row['Alternative'],
                "stopOrder": curr_row['stopOrder'],
                "actualArrivalTime": curr_row['actualArrivalTime'],
                "actualDepartureTime": curr_row['actualDepartureTime'],
                'Linkref': f"{prev_row['stopId']}:{curr_row['stopId']}",
                'linkTime': time_in_stop + link_travel_time,
                'time_first_stop(s)': time_in_stop,
                'Link_travel_time(s)': link_travel_time,
                'Trip_End': curr_row['Trip_End']
            }

    # Remove any None entries from the output list
    output_rows = [row for row in output_rows if row is not None]
    
    return pd.DataFrame(output_rows)


# testing on a sample of the data

### opening a single vm file

In [5]:
if not os.path.isfile('examplePickle'):
    # Read the CSV file
    df = pd.read_csv(r"C:\Users\shalt\Documents\final project\data\VM_feb2023_1\VM_feb2023_1.csv", encoding="utf-8")
    dbfile = open('examplePickle', 'ab')
    # source, destination
    pickle.dump(df, dbfile)                     
    dbfile.close()
else:
    dbfile = open('examplePickle', 'rb')     
    df = pickle.load(dbfile)

df.shape

(21057, 11)

In [6]:
df

Unnamed: 0,OPERATOR_ID,CLUSTER_ID,LINE_ID,LINE_DESC,LINE_SHORT_NAME,DatedVehicleJourneyRef,DataFrameRef,OriginAimedDepartureTime,VehicleRef,RECORD_ID,jsonString
0,3,91,5442,13016-1-ר,16,5489309,2023-02-01,2023-02-01 06:50:00.000,7584269,10313823097,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
1,3,91,5442,13016-1-ר,16,5489311,2023-02-01,2023-02-01 07:10:00.000,7796169,10313877960,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
2,3,91,5442,13016-1-ר,16,5489295,2023-02-01,2023-02-01 07:30:00.000,7721069,10313936357,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
3,3,91,5442,13016-1-ר,16,5489323,2023-02-01,2023-02-01 07:50:00.000,7783069,10313987628,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
4,3,91,5442,13016-1-ר,16,5489291,2023-02-01,2023-02-01 08:10:00.000,7790569,10314005593,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
...,...,...,...,...,...,...,...,...,...,...,...
21052,3,91,34629,46006-3-א,6א,585079446,2023-02-16,2023-02-16 23:25:00.000,7552969,10376717717,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
21053,3,91,34629,46006-3-א,6א,585079447,2023-02-16,2023-02-16 23:30:00.000,7559969,10376664120,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
21054,3,91,34629,46006-3-א,6א,585079448,2023-02-16,2023-02-16 23:35:00.000,7804169,10376677896,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."
21055,3,91,34629,46006-3-א,6א,585079449,2023-02-16,2023-02-16 23:40:00.000,7561869,10376697604,"{""tripMessages"":[{""tripMessage"":[{""messageCrea..."


## create a data set that shows detatails for each stop in each trip

### droping unnecesary features

In [7]:
df = split_line_desc(df) # creating routid, alternative and direction features.

In [8]:
columns_to_drop = ['LINE_ID','DataFrameRef', 'RECORD_ID'] # unnecesary columns.
df = df.drop(columns_to_drop, axis=1)
df.columns

Index(['OPERATOR_ID', 'CLUSTER_ID', 'LINE_DESC', 'LINE_SHORT_NAME',
       'DatedVehicleJourneyRef', 'OriginAimedDepartureTime', 'VehicleRef',
       'jsonString', 'RouteId', 'Direction', 'Alternative'],
      dtype='object')

### testing opening the trip message feature and creating a new data set

In [9]:
# initializing gloval variables.
bad_jsons = 0
rows_list = []

In [10]:
# creating data from 25 rows from the original data set.
df.tail(25).apply(add_columns_from_json, axis=1)
print('apply finished')

apply finished


In [11]:
# checking and sorting the result.
df_check = pd.DataFrame(rows_list)
df_check.head(10)

Unnamed: 0,DatedVehicleJourneyRef,LINE_DESC,OPERATOR_ID,CLUSTER_ID,LINE_SHORT_NAME,OriginAimedDepartureTime,RouteId,Direction,Alternative,stopOrder,stopId,actualArrivalTime,actualDepartureTime,License_Plate,Trip_End
0,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,3,2982,2023-02-15T08:45:49,2023-02-15T08:46:27,7716269,False
1,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,4,2983,2023-02-15T08:46:37,2023-02-15T08:47:14,7716269,False
2,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,4,2983,2023-02-15T08:46:37,2023-02-15T08:47:14,7716269,False
3,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,5,3087,2023-02-15T08:47:22,2023-02-15T08:47:42,7716269,False
4,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,5,3087,2023-02-15T08:47:22,2023-02-15T08:47:42,7716269,False
5,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
6,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
7,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
8,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
9,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False


In [12]:
df_check.drop_duplicates(inplace=True)
df_check.head(10)

Unnamed: 0,DatedVehicleJourneyRef,LINE_DESC,OPERATOR_ID,CLUSTER_ID,LINE_SHORT_NAME,OriginAimedDepartureTime,RouteId,Direction,Alternative,stopOrder,stopId,actualArrivalTime,actualDepartureTime,License_Plate,Trip_End
0,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,3,2982,2023-02-15T08:45:49,2023-02-15T08:46:27,7716269,False
1,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,4,2983,2023-02-15T08:46:37,2023-02-15T08:47:14,7716269,False
3,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,5,3087,2023-02-15T08:47:22,2023-02-15T08:47:42,7716269,False
5,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
15,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,7,5825,2023-02-15T08:48:37,2023-02-15T08:51:00,7716269,False
17,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,8,2003,2023-02-15T08:51:31,2023-02-15T08:52:01,7716269,False
18,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,9,6233,2023-02-15T08:52:02,2023-02-15T08:52:19,7716269,False
20,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,10,2004,2023-02-15T08:52:27,2023-02-15T08:53:00,7716269,False
22,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,11,1485,2023-02-15T08:53:01,2023-02-15T08:53:42,7716269,False
24,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,12,1486,2023-02-15T08:53:47,2023-02-15T08:54:12,7716269,False


In [13]:
df_check = df_check.sort_values(by=['LINE_DESC', 'OriginAimedDepartureTime', 'stopOrder'])
df_check.head(10)

Unnamed: 0,DatedVehicleJourneyRef,LINE_DESC,OPERATOR_ID,CLUSTER_ID,LINE_SHORT_NAME,OriginAimedDepartureTime,RouteId,Direction,Alternative,stopOrder,stopId,actualArrivalTime,actualDepartureTime,License_Plate,Trip_End
0,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,3,2982,2023-02-15T08:45:49,2023-02-15T08:46:27,7716269,False
1,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,4,2983,2023-02-15T08:46:37,2023-02-15T08:47:14,7716269,False
3,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,5,3087,2023-02-15T08:47:22,2023-02-15T08:47:42,7716269,False
5,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,6,3654,2023-02-15T08:47:58,2023-02-15T08:48:33,7716269,False
15,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,7,5825,2023-02-15T08:48:37,2023-02-15T08:51:00,7716269,False
17,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,8,2003,2023-02-15T08:51:31,2023-02-15T08:52:01,7716269,False
18,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,9,6233,2023-02-15T08:52:02,2023-02-15T08:52:19,7716269,False
20,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,10,2004,2023-02-15T08:52:27,2023-02-15T08:53:00,7716269,False
22,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,11,1485,2023-02-15T08:53:01,2023-02-15T08:53:42,7716269,False
24,57543633,12745-1-ב,3,91,745,2023-02-15 08:45:00.000,12745,1,ב,12,1486,2023-02-15T08:53:47,2023-02-15T08:54:12,7716269,False


## for the sample - create a data set for links between the stops

In [14]:
df_check = df_check.reset_index(drop=True)
list(df_check.columns)

['DatedVehicleJourneyRef',
 'LINE_DESC',
 'OPERATOR_ID',
 'CLUSTER_ID',
 'LINE_SHORT_NAME',
 'OriginAimedDepartureTime',
 'RouteId',
 'Direction',
 'Alternative',
 'stopOrder',
 'stopId',
 'actualArrivalTime',
 'actualDepartureTime',
 'License_Plate',
 'Trip_End']

In [15]:
links_check = create_links_data(df_check)
links_check = links_check.reset_index(drop=True)
links_check.head(10)

calculating links:   0%|          | 0/414 [00:00<?, ?it/s]

Unnamed: 0,TripId,OPERATOR_ID,CLUSTER_ID,License_Plate,LINE_SHORT_NAME,OriginAimedDepartureTime,LINE_DESC,RouteId,Direction,Alternative,stopOrder,actualArrivalTime,actualDepartureTime,Linkref,linkTime,time_first_stop(s),Link_travel_time(s),Trip_End
0,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,4,2023-02-15 08:46:37,2023-02-15 08:47:14,2982:2983,48,38,10,False
1,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,5,2023-02-15 08:47:22,2023-02-15 08:47:42,2983:3087,45,37,8,False
2,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,6,2023-02-15 08:47:58,2023-02-15 08:48:33,3087:3654,36,20,16,False
3,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,7,2023-02-15 08:48:37,2023-02-15 08:51:00,3654:5825,39,35,4,False
4,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,8,2023-02-15 08:51:31,2023-02-15 08:52:01,5825:2003,174,143,31,False
5,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,9,2023-02-15 08:52:02,2023-02-15 08:52:19,2003:6233,31,30,1,False
6,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,10,2023-02-15 08:52:27,2023-02-15 08:53:00,6233:2004,25,17,8,False
7,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,11,2023-02-15 08:53:01,2023-02-15 08:53:42,2004:1485,34,33,1,False
8,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,12,2023-02-15 08:53:47,2023-02-15 08:54:12,1485:1486,46,41,5,False
9,57543633,3,91,7716269,745,2023-02-15 08:45:00,12745-1-ב,12745,1,ב,13,2023-02-15 08:55:21,2023-02-15 08:55:54,1486:3662,94,25,69,False


# running the program on all the vm files:

## creating stops data

In [None]:
for i in tqdm(range(1,7), desc = 'files', colour='blue'):
    # opening file
    print(f"opening file number {i}")
    path = rf"C:\Users\shalt\Documents\final project\data\VM_feb2023_{i}\VM_feb2023_{i}.csv"
    original = pd.read_csv(path, encoding='utf-8')
    
    # droping unnecesary features.
    original = split_line_desc(original) # creating routid, alternative and direction features. 
    original = original.drop(['LINE_ID','DataFrameRef', 'RECORD_ID'], axis=1) # unnecesary columns.
    
    # initializing global variables
    rows_list = []
    bad_json = 0
    
    # creating data set
    print(f'creating data set {i}')
    tqdm.pandas(desc = f'opening jsons {i}')
    original.progress_apply(add_columns_from_json, axis=1)
    print(f'there were {bad_json} bad jsons rows')
    temp_stops = pd.DataFrame(rows_list)
    temp_stops.drop_duplicates(inplace=True)
    if i == 1:
        stops = temp_stops
        
    else:
        stops = pd.concat([stops, temp_stops], ignore_index=True)
    del temp_stops # to reduce memory usage   

stops = stops.sort_values(by=['LINE_DESC', 'OriginAimedDepartureTime', 'stopOrder'])

In [24]:
stops.reset_index(drop=True, inplace=True)
stops.drop(columns='index', inplace=True)
stops.head()

Unnamed: 0,DatedVehicleJourneyRef,LINE_DESC,OPERATOR_ID,CLUSTER_ID,LINE_SHORT_NAME,OriginAimedDepartureTime,RouteId,Direction,Alternative,stopOrder,stopId,actualArrivalTime,actualDepartureTime,License_Plate,Trip_End
0,1382429,10102-1-#,3,91,102,2023-02-03 00:30:00.000,10102,1,#,1,2721,2023-02-03T00:30:00,2023-02-03T00:31:46,7560469,False
1,1382429,10102-1-#,3,91,102,2023-02-03 00:30:00.000,10102,1,#,2,2528,2023-02-03T00:31:53,2023-02-03T00:32:11,7560469,False
2,1382429,10102-1-#,3,91,102,2023-02-03 00:30:00.000,10102,1,#,3,2339,2023-02-03T00:32:35,2023-02-03T00:32:49,7560469,False
3,1382429,10102-1-#,3,91,102,2023-02-03 00:30:00.000,10102,1,#,4,2345,2023-02-03T00:33:14,2023-02-03T00:33:34,7560469,False
4,1382429,10102-1-#,3,91,102,2023-02-03 00:30:00.000,10102,1,#,6,2150,2023-02-03T00:34:10,2023-02-03T00:34:26,7560469,False


### saving stops data

In [25]:
stops.to_pickle(r'C:\Users\shalt\Documents\final project\data\stops.pkl')

## creating vm_links

In [26]:
vm_links = create_links_data(stops)
vm_links.reset_index(inplace=True)
vm_links.drop(columns='index', inplace=True)

calculating links:   0%|          | 0/4610429 [00:00<?, ?it/s]

In [27]:
vm_links

Unnamed: 0,TripId,OPERATOR_ID,CLUSTER_ID,License_Plate,LINE_SHORT_NAME,OriginAimedDepartureTime,LINE_DESC,RouteId,Direction,Alternative,stopOrder,actualArrivalTime,actualDepartureTime,Linkref,linkTime,time_first_stop(s),Link_travel_time(s),Trip_End
0,1382429,3,91,7560469,102,2023-02-03 00:30:00,10102-1-#,10102,1,#,2,2023-02-03 00:31:53,2023-02-03 00:32:11,2721:2528,113,106,7,False
1,1382429,3,91,7560469,102,2023-02-03 00:30:00,10102-1-#,10102,1,#,3,2023-02-03 00:32:35,2023-02-03 00:32:49,2528:2339,42,18,24,False
2,1382429,3,91,7560469,102,2023-02-03 00:30:00,10102-1-#,10102,1,#,4,2023-02-03 00:33:14,2023-02-03 00:33:34,2339:2345,39,14,25,False
3,1382429,3,91,7560469,102,2023-02-03 00:30:00,10102-1-#,10102,1,#,7,2023-02-03 00:34:34,2023-02-03 00:34:53,2150:2152,24,16,8,False
4,1382429,3,91,7560469,102,2023-02-03 00:30:00,10102-1-#,10102,1,#,8,2023-02-03 00:35:09,2023-02-03 00:35:30,2152:2163,35,19,16,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4244247,11863404,3,91,7547869,4,2023-02-28 08:00:00,70004-2-#,70004,2,#,24,2023-02-28 08:33:22,2023-02-28 08:33:37,1849:1850,24,18,6,False
4244248,11863404,3,91,7547869,4,2023-02-28 08:00:00,70004-2-#,70004,2,#,25,2023-02-28 08:34:05,2023-02-28 08:34:25,1850:1852,43,15,28,False
4244249,11863404,3,91,7547869,4,2023-02-28 08:00:00,70004-2-#,70004,2,#,26,2023-02-28 08:35:02,2023-02-28 08:35:31,1852:956,57,20,37,False
4244250,11863404,3,91,7547869,4,2023-02-28 08:00:00,70004-2-#,70004,2,#,27,2023-02-28 08:36:05,2023-02-28 08:36:16,956:2167,63,29,34,False


### saving vm links to a pickle

In [28]:
vm_links.to_pickle(r'C:\Users\shalt\Documents\final project\data\vm_links.pkl')