In [121]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import col, row_number
from pyspark.sql import SparkSession

#### **Functions**

- `select()` - seleciona colunas
- `filter()` - filtra linhas

In [122]:
spark = SparkSession.builder \
    .master("local") \
    .appName("RoadTracker") \
    .config("spark.driver.memory", "8g") \
    .config("spark.executor.cores", "4") \
    .getOrCreate()

In [123]:
# load the data
df = spark.read.csv("../mock/all_roads.csv", header=True, inferSchema=True)

In [124]:
# ANALISE 1: NÚMERO DE RODOVIAS MONITORADAS
n_roads = df.select("road").distinct().count()
print("Number of roads: {}".format(n_roads))

Number of roads: 5


In [125]:
# ANALISE 2: NUMERO TOTAL DE VEICULOS MONITORADOS
n_cars = df.select("plate").distinct().count()
print("Number of cars: {}".format(n_cars))

Number of cars: 248


In [126]:
# CALCULATE SPEED AND ACCELERATION

windowDept = Window.partitionBy("plate").orderBy(col("time").desc())

df = df.withColumn("row",row_number().over(windowDept)) \
        .filter(col("row") <= 3)

# calculo da velocidade
df = df.withColumn("speed", F.col("x") - F.lag("x", -1).over(windowDept))

# make all values positive
df = df.withColumn("speed", F.abs(F.col("speed")))

# calculo da aceleracao
df = df.withColumn("acc", F.col("speed") - F.lag("speed", -1).over(windowDept))

# drop null values
df = df.na.drop()

# drop row column
df = df.drop("row")

df.show()

+-----+----------+---------+---+---+-----+--------------------+---------+-----+---+
| road|road_speed|road_size|  x|  y|plate|                time|direction|speed|acc|
+-----+----------+---------+---+---+-----+--------------------+---------+-----+---+
|road3|       120|     1000|220|  2|AC272| 1.686422274283066E9|        1|    0|  0|
|road3|       120|     1000|331|  9|AU218| 1.686422286230809E9|       -1|   72|  2|
|road1|       120|     1000|802|  8|AU372| 1.686422266813969E9|       -1|    0|  0|
|road4|       120|     1000|862|  6|AV516|1.6864222740496042E9|       -1|    0|-42|
|road0|       120|     1000|235|  9|AW795|1.6864222719671092E9|       -1|  111| -2|
|road3|       120|     1000|593|  9|BE498| 1.686422286057657E9|       -1|   81| -4|
|road1|       120|     1000|681|  7|BF544|1.6864221999083478E9|       -1|    0|  0|
|road1|       120|     1000|252|  1|BK572| 1.686422235138796E9|        1|    0|  0|
|road0|       120|     1000|843|  8|BP113| 1.686422269291418E9|       -1|   

In [127]:
# ANALISE 3: NUMERO DE VEICULOS ACIMA DO LIMITE DE VELOCIDADE

# add a column for the cars over the speed limit
df = df.withColumn("over_speed_limit", F.when(F.col("speed") > F.col("road_speed"), 1).otherwise(0))

cars_over_speed_limit = df.filter(F.col("over_speed_limit") == 1) \
    .select("plate") \
    .distinct() \
    .count()

print("Number of cars over the speed limit: {}".format(cars_over_speed_limit))

Number of cars over the speed limit: 1


In [128]:
# DF DE RISCO DE COLISÃO
windowDept = Window.partitionBy("road", "y").orderBy("x")

# calcula o risco de colisao fazendo posicao + (velocidade * direcao) + (aceleracao * direcao)

# essa versao funciona, mas adiciona o risco de colisao em carros diferentes dependendo de forward ou backward
#df = df.withColumn("collision_risk",
                #F.when((F.col("x") + (F.col("speed") * F.col("direction"))) > (F.lag("x", -1).over(windowDept) + (F.lag("speed", -1).over(windowDept) * F.lag("direction", -1).over(windowDept))), 1).otherwise(0))

# essa versao padroniza           
df = df.withColumn("collision_risk",
                   F.when(F.col("direction") == 1,
                          F.when((F.col("x") + F.col("speed") + F.col("acc")) > (F.lag("x", -1).over(windowDept) + F.lag("speed", -1).over(windowDept) + F.lag("acc",-1).over(windowDept)), 1).otherwise(0)) \
                   .otherwise(F.when((F.col("x") - F.col("speed") - F.col("acc")) < (F.lag("x", 1).over(windowDept) - F.lag("speed", 1).over(windowDept) - F.lag("acc", 1).over(windowDept)), 1).otherwise(0)))

df.show(200)

+-----+----------+---------+---+---+-----+--------------------+---------+-----+---+----------------+--------------+
| road|road_speed|road_size|  x|  y|plate|                time|direction|speed|acc|over_speed_limit|collision_risk|
+-----+----------+---------+---+---+-----+--------------------+---------+-----+---+----------------+--------------+
|road0|       120|     1000|770|  0|CN934| 1.686422283014108E9|        1|   91| -1|               0|             0|
|road0|       120|     1000|223|  1|EV877|1.6864222815837052E9|        1|    0|  0|               0|             0|
|road0|       120|     1000|223|  1|GF380| 1.686422282002362E9|        1|    0|  0|               0|             0|
|road0|       120|     1000|223|  1|MD954| 1.686422235568651E9|        1|    0|  0|               0|             0|
|road0|       120|     1000|223|  1|RV000| 1.686422281515041E9|        1|    0|  0|               0|             0|
|road0|       120|     1000|223|  1|UJ592|1.6864222814049962E9|        1

In [129]:
# ANALISE 4: NUMERO DE VEICULOS COM RISCO DE COLISAO

cars_collision_risk = df.filter(F.col("collision_risk") == 1) \
    .select("plate").distinct().count()

print("Number of cars with collision risk: {}".format(cars_collision_risk))

Number of cars with collision risk: 14


In [130]:
# ANALISE 5: LISTA DE VEICULOS ACIMA DO LIMITE DE VELOCIDADE
# Placa, velocidade e se está com risco de colisão
CollectionOverSpeedLimit = df.filter(F.col("over_speed_limit") == 1) \
                        .select("plate", "speed", "collision_risk") \
                        .collect()

CollectionOverSpeedLimit

[Row(plate='VQ482', speed=125, collision_risk=0)]

In [131]:
# ANALISE 6: LISTA DE VEICULOS COM RISCO DE COLISAO
# Placa e velocidade
CollectionCollisionRisk = df.filter(F.col("collision_risk") == 1) \
                .select("plate", "speed") \
                .collect()
                
CollectionCollisionRisk

[Row(plate='XU944', speed=0),
 Row(plate='SK487', speed=0),
 Row(plate='SM452', speed=0),
 Row(plate='GD632', speed=71),
 Row(plate='TU466', speed=74),
 Row(plate='UV613', speed=0),
 Row(plate='FL536', speed=0),
 Row(plate='IQ682', speed=112),
 Row(plate='RJ739', speed=89),
 Row(plate='CT370', speed=102),
 Row(plate='WW903', speed=109),
 Row(plate='HL932', speed=60),
 Row(plate='LQ402', speed=0),
 Row(plate='JF661', speed=0)]