# Note:
* First of all, **run the libraries**
* Then **download the file (*XML*)** by the following instructions provided after running 
* Set the paths for **downloaded *XML-file***
* Set the path for **saving the *CSV-file*** (created from XML-file)

# Libraries

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 

from urllib.request import urlopen
import xml.etree.ElementTree as et 
import math
from datetime import datetime

## Download the XML file from below link

##### https://jejuunivackr-my.sharepoint.com/:u:/g/personal/asif_office_jejunu_ac_kr/EVjo0oc4peJPvy6SOZuoqkEBHhtyyWru1yc3SFPFAJNjqg
If above link does not work (contact me via malikasifmahmoodawan@gmail.com)

## Setting the path from we'll read the downloaded XML-file

In [None]:
path_to_downloaded_xml_file = (
    "/media/ncl-akraino/Volume-HDD-1/akraino/ws-akraino-datasets/OpenStreetMap 2 Traces/" 
    "SelfOpenStreetMapTraceForASparseTraffic.xml" 
)

## Setting the path to where we'll save the created CSV-file

In [None]:
path_to_save_csv_file = (
    r"/media/ncl-akraino/Volume-HDD-1/akraino/ws-akraino-datasets/OpenStreetMap 2 Traces/" 
    "SelfOpenStreetMapTraceForASparseTraffic.csv" 
)

## Initializing variables

In [None]:
display_max_rows = 60
display_max_cols = 10

In [None]:
df_cols = ["time", "id", "x", "y", "angle", "speed", "pos", "lane"]
rows = []

In [None]:
pd.set_option("display.max_columns", display_max_cols)

<br>

## Displaying (wink) Copyrights information

In [None]:
# Copyrights by Asif Mehmood
print ("Asif©"+datetime.now().strftime("%Y"))

<br>

## Provide the full path to above downloaded file

In [None]:
xtree = et.parse(path_to_downloaded_xml_file)
xroot = xtree.getroot() 

## Iterating the OpenStreetMap file and building an array from it for the dataset creation

In [None]:
for node in xroot: 
    for inner_node in node: 
        attribs = inner_node.attrib
        rows.append({
            "time": node.attrib.get("time"), 
            "id": attribs.get("id"), 
            "x": attribs.get("x"), 
            "y": attribs.get("y"), 
            "angle": attribs.get("angle"), 
            "speed": attribs.get("speed"), 
            "pos": attribs.get("pos"), 
            "lane": attribs.get("lane") 
        })

## Displaying the rows formatted in arrays

In [None]:
rows

## Creating a pandas dataframe out of the array

In [None]:
out_df = pd.DataFrame(rows, columns = df_cols)

## Converting strings to float-values in the dataset

In [None]:
out_df["time"] = out_df["time"].astype(float)
out_df["x"] = out_df["x"].astype(float)
out_df["y"] = out_df["y"].astype(float)
out_df["angle"] = out_df["angle"].astype(float)
out_df["speed"] = out_df["speed"].astype(float)
out_df["pos"] = out_df["pos"].astype(float)

## Displaying the pandas dataframe

In [None]:
out_df[:display_max_rows+1]

## Exporting/saving the dataframe into CSV file

In [None]:
out_df.to_csv(path_to_save_csv_file, index=False) 

## Number of records in the dataset

In [None]:
number_of_records_total = out_df.shape[0]
print ("number of dataset-records: ", number_of_records_total)

## Fetching minimum x and minimum y coordinates

In [None]:
min_x = min(out_df["x"])
min_y = min(out_df["y"])
print ("minimum longititude (X): ", min_x)
print ("minimum latitude (Y): ", min_y)

## Fetching maximum x and maximum y coordinates

In [None]:
max_x = max(out_df["x"])
max_y = max(out_df["y"])
print ("maximum longititude (X): ", max_x)
print ("maximum latitude (Y): ", max_y)

## Graph should start from and end at these coordinates

In [None]:
print ("From(X,Y): ", min_x, ",", min_y)
print ("To(X,Y): ", max_x, ", ", max_y)

## Setting maximum, minimum limit for graphs in Pilot

In [None]:
plt.xlim([math.floor(min_x), math.ceil(max_x)])
plt.ylim([math.floor(min_y), math.ceil(max_y)])
plt.figure(figsize=(12, 8))

## Group data by time

In [None]:
df_grouped_by_time = out_df.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
df_grouped_by_time = df_grouped_by_time.groupby(["time"])
df_grouped_by_time = df_grouped_by_time.count().reset_index()
df_grouped_by_time = df_grouped_by_time.rename(
    columns={"id": "records"}
)
df_grouped_by_time = df_grouped_by_time.sort_values(
    ["records"], ascending=False
)

In [None]:
print (df_grouped_by_time[:display_max_rows+1])

## Number of timesteps

In [None]:
number_of_records_grouped_by_time = df_grouped_by_time.shape[0]
print ("number of timesteps: ", number_of_records_grouped_by_time)

# From now on, notebook can be run remotely (only from jejunu network)

## Group data by vehicle id

In [None]:
df_grouped_by_id = out_df.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
df_grouped_by_id = df_grouped_by_id.groupby(["id"])
df_grouped_by_id = df_grouped_by_id.count().reset_index()
df_grouped_by_id = df_grouped_by_id.rename(
    columns={"time": "records"}
)
df_grouped_by_id = df_grouped_by_id.sort_values(
    ["records"], ascending=False
)

In [None]:
print (df_grouped_by_id[:display_max_rows+1])

## Number of vehicles

In [None]:
number_of_records_grouped_by_id = df_grouped_by_id.shape[0]
print ("number of vehicles: ", number_of_records_grouped_by_id)

## Displaying the route for vehicle with most number of records

In [None]:
veh1501_records = out_df[out_df["id"] == "veh1501"]

In [None]:
print (veh1501_records)

## Plotting route (i.e. x, y coordinates for above vehicle

In [None]:
veh1501_plot = veh1501_records.plot(x="x", y="y", label="Vehicle 1501", figsize=(12,8))
veh1501_plot.legend("Vehicle 1501's mobility on map")
veh1501_plot.set_xlabel("longitude")
veh1501_plot.set_ylabel("latitude")
veh1501_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh1501_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Displaying the route for another vehicle

In [None]:
veh1551_records = out_df[out_df["id"] == "veh1551"]

In [None]:
print (veh1551_records)

## Plotting route (i.e. x, y coordinates for above vehicle

In [None]:
veh1551_plot = veh1551_records.plot(x="x", y="y", label="Vehicle 1551", figsize=(12,8))
veh1551_plot.set_xlabel("longitude")
veh1551_plot.set_ylabel("latitude")
veh1551_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh1551_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Displaying the route for another vehicle

In [None]:
veh1587_records = out_df[out_df["id"] == "veh1587"]

In [None]:
print (veh1587_records)

## Plotting route (i.e. x, y coordinates for above vehicle

In [None]:
veh1587_plot = veh1587_records.plot(x="x", y="y", label="Vehicle 1587", figsize=(12,8))
veh1587_plot.set_xlabel("longitude")
veh1587_plot.set_ylabel("latitude")
veh1587_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh1587_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Fetching vehicle coordinates for Y<=1600

In [None]:
vehbelowxy1600_records = out_df[out_df["y"] <= 1600]

In [None]:
vehbelowxy1600_records

## Group data by vehicle id (for vehicle-below-X-Y)

In [None]:
vehbelowxy1600_records_grouped_by_id = vehbelowxy1600_records.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
vehbelowxy1600_records_grouped_by_id = vehbelowxy1600_records_grouped_by_id.groupby(["id"])
vehbelowxy1600_records_grouped_by_id = vehbelowxy1600_records_grouped_by_id.count().reset_index()
vehbelowxy1600_records_grouped_by_id = vehbelowxy1600_records_grouped_by_id.rename(
    columns={"time": "records"}
)
vehbelowxy1600_records_grouped_by_id = vehbelowxy1600_records_grouped_by_id.sort_values(
    ["records"], ascending=False
)

In [None]:
print (vehbelowxy1600_records_grouped_by_id[:display_max_rows+1])

## Displaying the route for another vehicle

In [None]:
veh2509_records = vehbelowxy1600_records[vehbelowxy1600_records["id"] == "veh2509"]

In [None]:
print (veh2509_records)

## Plotting route (i.e. x, y coordinates for above vehicle)

In [None]:
veh2509_plot = veh2509_records.plot(x="x", y="y", label="Vehicle 2509", figsize=(12,8))
veh2509_plot.set_xlabel("longitude")
veh2509_plot.set_ylabel("latitude")
veh2509_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh2509_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Fetching vehicle coordinates for Y<=1200

In [None]:
vehbelowxy1200_records = out_df[out_df["y"] <= 1200]

In [None]:
vehbelowxy1200_records

## Group data by vehicle id (for vehicle-below-X-Y, Y<=1200)

In [None]:
vehbelowxy1200_records_grouped_by_id = vehbelowxy1200_records.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
vehbelowxy1200_records_grouped_by_id = vehbelowxy1200_records_grouped_by_id.groupby(["id"])
vehbelowxy1200_records_grouped_by_id = vehbelowxy1200_records_grouped_by_id.count().reset_index()
vehbelowxy1200_records_grouped_by_id = vehbelowxy1200_records_grouped_by_id.rename(
    columns={"time": "records"}
)
vehbelowxy1200_records_grouped_by_id = vehbelowxy1200_records_grouped_by_id.sort_values(
    ["records"], ascending=False
)

In [None]:
print (vehbelowxy1200_records_grouped_by_id[:display_max_rows+1])

## Displaying the route for another vehicle

In [None]:
veh3869_records = vehbelowxy1200_records[vehbelowxy1200_records["id"] == "veh3869"]

In [None]:
print (veh3869_records)

## Plotting route (i.e. x, y coordinates for above vehicle)

In [None]:
veh3869_plot = veh3869_records.plot(x="x", y="y", label="Vehicle 3869", figsize=(12,8))
veh3869_plot.set_xlabel("longitude")
veh3869_plot.set_ylabel("latitude")
veh3869_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh3869_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Displaying the route for another vehicle

In [None]:
veh4039_records = vehbelowxy1200_records[vehbelowxy1200_records["id"] == "veh4039"]

In [None]:
print (veh4039_records)

## Plotting route (i.e. x, y coordinates for above vehicle)

In [None]:
veh4039_plot = veh4039_records.plot(x="x", y="y", label="Vehicle 4039", figsize=(12,8))
veh4039_plot.set_xlabel("longitude")
veh4039_plot.set_ylabel("latitude")
veh4039_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh4039_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Displaying the route for another vehicle

In [None]:
veh3618_records = vehbelowxy1200_records[vehbelowxy1200_records["id"] == "veh3618"]

In [None]:
print (veh3618_records)

## Plotting route (i.e. x, y coordinates for above vehicle)

In [None]:
veh3618_plot = veh3618_records.plot(x="x", y="y", label="Vehicle 3618", figsize=(12,8))
veh3618_plot.set_xlabel("longitude")
veh3618_plot.set_ylabel("latitude")
veh3618_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
veh3618_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Sort data by time and lane number

In [None]:
df_sorted_by_time_lane = out_df.sort_values(["time", "lane"], ascending=False)

## Displaying data sorted by time and lanes

In [None]:
pd.set_option("display.max_rows", out_df.shape[0])
print (df_sorted_by_time_lane[:10000])
pd.set_option("display.max_rows", display_max_rows)

## Group data by time and lane number

In [None]:
df_sorted_by_time_lane = df_sorted_by_time_lane.drop(
    columns=["x", "y", "angle", "speed", "pos"]
)
df_sorted_by_time_lane = df_sorted_by_time_lane[["time", "lane", "id"]]
df_sorted_by_time_lane = df_sorted_by_time_lane.groupby(["time", "lane"])
df_sorted_by_time_lane = df_sorted_by_time_lane.count().reset_index()
df_sorted_by_time_lane = df_sorted_by_time_lane.rename(
    columns={"id": "records"}
)
df_sorted_by_time_lane = df_sorted_by_time_lane.sort_values(["records"], ascending=False)

In [None]:
pd.set_option("display.max_rows", df_sorted_by_time_lane.shape[0])
print (df_sorted_by_time_lane[:50000])
pd.set_option("display.max_rows", display_max_rows)

## Displaying lane_and_time grouped/sorted maximum records

In [None]:
lanetime_516417601_0_6_2592_0_records = out_df[(out_df["lane"] == "516417601#0_6") & (out_df["time"] == 2592.0)]

In [None]:
pd.set_option("display.width", 1000)

In [None]:
print (lanetime_516417601_0_6_2592_0_records)

## Displaying lane_and_time grouped/sorted maximum records (2)

In [None]:
lanetime_516417601_0_6_2601_0_records = out_df[(out_df["lane"] == "516417601#0_6") & (out_df["time"] == 2601.0)]

In [None]:
print (lanetime_516417601_0_6_2601_0_records)

## Displaying vehicle traffic for lane_516417601#0_6

In [None]:
lane_516417601_0_6_records = out_df[(out_df["lane"] == "516417601#0_6")]

In [None]:
print (lane_516417601_0_6_records)

In [None]:
lane_516417601_0_6_plot = lane_516417601_0_6_records.plot(x="x", y="y", label="lane_516417601_0_6", figsize=(12,8))
lane_516417601_0_6_plot.set_xlabel("longitude")
lane_516417601_0_6_plot.set_ylabel("latitude")
lane_516417601_0_6_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
lane_516417601_0_6_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Group data by time for lane_516417601#0_6

In [None]:
lane_516417601_0_6_grouped_by_time_records = lane_516417601_0_6_records.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
lane_516417601_0_6_grouped_by_time_records = lane_516417601_0_6_grouped_by_time_records.groupby(["time"])
lane_516417601_0_6_grouped_by_time_records = lane_516417601_0_6_grouped_by_time_records.count().reset_index()
lane_516417601_0_6_grouped_by_time_records = lane_516417601_0_6_grouped_by_time_records.rename(
    columns={"id": "records"}
)
lane_516417601_0_6_grouped_by_time_records = lane_516417601_0_6_grouped_by_time_records.sort_values(
    ["time"], ascending=True
)

In [None]:
pd.set_option("display.max_rows", lane_516417601_0_6_grouped_by_time_records.shape[0])
print (lane_516417601_0_6_grouped_by_time_records[:6800])
pd.set_option("display.max_rows", display_max_rows)

In [None]:
print (lane_516417601_0_6_grouped_by_time_records)

## Displaying graph for number of vehicles on lane_516417601#0_6 at time "t"

In [None]:
list(lane_516417601_0_6_grouped_by_time_records)

In [None]:
lane_516417601_0_6_grouped_by_time_records_plot = lane_516417601_0_6_grouped_by_time_records.plot(
    x="time", y="records", label="lane_516417601_0_6_grouped_by_time_records", figsize=(12,8)
)
lane_516417601_0_6_grouped_by_time_records_plot.set_xlabel("time")
lane_516417601_0_6_grouped_by_time_records_plot.set_ylabel("records")
#lane_516417601_0_6_grouped_by_time_records_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
#lane_516417601_0_6_grouped_by_time_records_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Displaying vehicle traffic for lane_620634303#5_5

In [None]:
lane_620634303_5_5_records = out_df[(out_df["lane"] == "620634303#5_5")]

In [None]:
print (lane_620634303_5_5_records)

In [None]:
lane_620634303_5_5_plot = lane_620634303_5_5_records.plot(x="x", y="y", label="lane_620634303_5_5", figsize=(12,8))
lane_620634303_5_5_plot.set_xlabel("longitude")
lane_620634303_5_5_plot.set_ylabel("latitude")
lane_620634303_5_5_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
lane_620634303_5_5_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])

## Group data by time for lane_620634303#5_5

In [None]:
lane_620634303_5_5_grouped_by_time_records = lane_620634303_5_5_records.drop(
    columns=["x", "y", "angle", "speed", "pos", "lane"]
)
lane_620634303_5_5_grouped_by_time_records = lane_620634303_5_5_grouped_by_time_records.groupby(["time"])
lane_620634303_5_5_grouped_by_time_records = lane_620634303_5_5_grouped_by_time_records.count().reset_index()
lane_620634303_5_5_grouped_by_time_records = lane_620634303_5_5_grouped_by_time_records.rename(
    columns={"id": "records"}
)
lane_620634303_5_5_grouped_by_time_records = lane_620634303_5_5_grouped_by_time_records.sort_values(
    ["time"], ascending=True
)

In [None]:
pd.set_option("display.max_rows", lane_620634303_5_5_grouped_by_time_records.shape[0])
print (lane_620634303_5_5_grouped_by_time_records[:6800])
pd.set_option("display.max_rows", display_max_rows)

In [None]:
print (lane_620634303_5_5_grouped_by_time_records)

## Displaying graph for number of vehicles on lane_620634303#5_5 at time "t"

In [None]:
list(lane_620634303_5_5_grouped_by_time_records)

In [None]:
lane_620634303_5_5_grouped_by_time_records_plot = lane_620634303_5_5_grouped_by_time_records.plot(
    x="time", y="records", label="lane_620634303_5_5_grouped_by_time_records", figsize=(12,8)
)
lane_620634303_5_5_grouped_by_time_records_plot.set_xlabel("time")
lane_620634303_5_5_grouped_by_time_records_plot.set_ylabel("records")
#lane_620634303_5_5_grouped_by_time_records_plot.set_xlim([math.floor(min_x), math.ceil(max_x)])
#lane_620634303_5_5_grouped_by_time_records_plot.set_ylim([math.floor(min_y), math.ceil(max_y)])