In [1]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null

In [2]:
#!wget -q https://www-us.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz
!wget -q https://dlcdn.apache.org/spark/spark-3.5.0/spark-3.5.0-bin-hadoop3.tgz

In [3]:
!tar xf spark-3.5.0-bin-hadoop3.tgz

In [4]:
!pip install -q findspark

In [5]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-3.5.0-bin-hadoop3"

In [6]:
import findspark
findspark.init()

In [7]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("AR Mining").getOrCreate()

In [8]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

file_path = "data.csv"
data = spark.read.csv(file_path, header=True, inferSchema=True)
#data.show(5)

In [9]:
data = data.dropna()
columns_to_drop = ['lat', 'lng', 'desc', 'timeStamp', 'addr', 'title', 'zip']
data = data.drop(*columns_to_drop)
data.show(5)

  and should_run_async(code)


+-----------------+-------+------------------+------+-----------+------------+
|              twp|service|              type|season|time_of_day|time_of_week|
+-----------------+-------+------------------+------+-----------+------------+
|      NEW HANOVER|    EMS| BACK PAINS/INJURY|winter|    evening|     weekday|
|HATFIELD TOWNSHIP|    EMS|DIABETIC EMERGENCY|winter|    evening|     weekday|
|       NORRISTOWN|   Fire|     GAS-ODOR/LEAK|winter|  afternoon|     weekday|
|       NORRISTOWN|    EMS| CARDIAC EMERGENCY|winter|  afternoon|     weekday|
| LOWER POTTSGROVE|    EMS|         DIZZINESS|winter|  afternoon|     weekday|
+-----------------+-------+------------------+------+-----------+------------+
only showing top 5 rows



In [10]:
import numpy as np
from pyspark.ml.fpm import FPGrowth
from pyspark.sql.functions import split
from pyspark.sql.functions import array, col

# Assuming 'col1', 'col2', 'col3' are columns you want to use as antecedents
antecedent_cols = ['twp', 'service', 'type', 'season', 'time_of_day', 'time_of_week']

# Merging multiple columns into a single array column
data = data.withColumn("items", array(*[col(col_name) for col_name in antecedent_cols]))

# Selecting the merged column as transactions
transactions = data.select("items")

# Create an FPGrowth instance
fp_growth = FPGrowth(itemsCol="items", minSupport=0.01, minConfidence=0.2)

# Fit the model on your data
model = fp_growth.fit(transactions)

  and should_run_async(code)


In [11]:
model.freqItemsets.show(truncate=False, n=200)

  and should_run_async(code)


+-------------------------------------------------------+------+
|items                                                  |freq  |
+-------------------------------------------------------+------+
|[HEAD INJURY]                                          |17204 |
|[HEAD INJURY, EMS]                                     |17201 |
|[HEAD INJURY, EMS, weekday]                            |12367 |
|[HEAD INJURY, weekday]                                 |12370 |
|[UNKNOWN MEDICAL EMERGENCY]                            |9925  |
|[UNKNOWN MEDICAL EMERGENCY, EMS]                       |9923  |
|[UNKNOWN MEDICAL EMERGENCY, EMS, weekday]              |7161  |
|[UNKNOWN MEDICAL EMERGENCY, weekday]                   |7163  |
|[NAUSEA/VOMITING]                                      |7214  |
|[NAUSEA/VOMITING, EMS]                                 |7212  |
|[WEST NORRITON]                                        |10582 |
|[WEST NORRITON, weekday]                               |7922  |
|[spring]                

In [12]:
model.associationRules.select("antecedent", "consequent", "confidence").show(truncate=False, n=200)

  and should_run_async(code)


+----------------------------------------------+------------------+-------------------+
|antecedent                                    |consequent        |confidence         |
+----------------------------------------------+------------------+-------------------+
|[PLYMOUTH]                                    |[Traffic]         |0.4676349198056528 |
|[PLYMOUTH]                                    |[VEHICLE ACCIDENT]|0.3618410741340578 |
|[PLYMOUTH]                                    |[EMS]             |0.4074499764902565 |
|[PLYMOUTH]                                    |[afternoon]       |0.33974191526043573|
|[PLYMOUTH]                                    |[weekday]         |0.7628128101980043 |
|[FALL VICTIM]                                 |[spring]          |0.26537306112439457|
|[FALL VICTIM]                                 |[fall]            |0.22288639568389432|
|[FALL VICTIM]                                 |[morning]         |0.31515541659003127|
|[FALL VICTIM]                  