In [2]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import LogisticRegression
from pyspark.sql.functions import col

SPARK_MASTER = "spark://spark-master:7077" 

spark = (
    SparkSession.builder.appName("PySparkML_Test")
    .master(SPARK_MASTER)
    .getOrCreate()
)
print(f"--- Spark Session démarrée et connectée au Master : {SPARK_MASTER} ---")
print("-" * 50)

data = [
    (1.0, 5.0, 1.0),
    (2.0, 6.0, 0.0),
    (3.0, 7.0, 1.0),
    (4.0, 8.0, 0.0),
    (5.0, 9.0, 1.0),
    (6.0, 4.0, 0.0),
    (7.0, 3.0, 1.0),
    (8.0, 2.0, 0.0),
]
columns = ["feat_1", "feat_2", "label"]
df = spark.createDataFrame(data, columns)
print("Jeu de données initial:")
df.show()

assembler = VectorAssembler(inputCols=["feat_1", "feat_2"], outputCol="features")
output = assembler.transform(df)

training_data = output.select(
    col("label").cast("int"),
    col("features")
)
print("Données prêtes pour le ML:")
training_data.show(truncate=False)

lr = LogisticRegression(labelCol="label", featuresCol="features", maxIter=10)
print("Démarrage de l'entraînement du modèle...")

lr_model = lr.fit(training_data)

print(f"Coefficient pour les features : {lr_model.coefficients}")
print(f"Intercept : {lr_model.intercept}")

predictions = lr_model.transform(training_data)
print("\nPrédictions:")
predictions.select("label", "prediction", "probability", "features").show(truncate=False)

spark.stop()
print("-" * 50)
print("Spark Session arrêtée.")

25/12/08 22:07:00 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


--- Spark Session démarrée et connectée au Master : spark://spark-master:7077 ---
--------------------------------------------------
Jeu de données initial:


                                                                                

+------+------+-----+
|feat_1|feat_2|label|
+------+------+-----+
|   1.0|   5.0|  1.0|
|   2.0|   6.0|  0.0|
|   3.0|   7.0|  1.0|
|   4.0|   8.0|  0.0|
|   5.0|   9.0|  1.0|
|   6.0|   4.0|  0.0|
|   7.0|   3.0|  1.0|
|   8.0|   2.0|  0.0|
+------+------+-----+

Données prêtes pour le ML:


                                                                                

+-----+---------+
|label|features |
+-----+---------+
|1    |[1.0,5.0]|
|0    |[2.0,6.0]|
|1    |[3.0,7.0]|
|0    |[4.0,8.0]|
|1    |[5.0,9.0]|
|0    |[6.0,4.0]|
|1    |[7.0,3.0]|
|0    |[8.0,2.0]|
+-----+---------+

Démarrage de l'entraînement du modèle...


25/12/08 22:07:09 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
                                                                                

--- ✅ Modèle entraîné avec succès ! ---
Coefficient pour les features : [-0.12977479290489574,0.12977479290489558]
Intercept : -0.13780008530295532

Prédictions:
+-----+----------+----------------------------------------+---------+
|label|prediction|probability                             |features |
+-----+----------+----------------------------------------+---------+
|1    |1.0       |[0.405813611491472,0.594186388508528]   |[1.0,5.0]|
|0    |1.0       |[0.405813611491472,0.594186388508528]   |[2.0,6.0]|
|1    |1.0       |[0.40581361149147205,0.5941863885085279]|[3.0,7.0]|
|0    |1.0       |[0.4058136114914721,0.5941863885085279] |[4.0,8.0]|
|1    |1.0       |[0.4058136114914721,0.5941863885085279] |[5.0,9.0]|
|0    |0.0       |[0.5980507238966765,0.40194927610332354]|[6.0,4.0]|
|1    |0.0       |[0.6585635059504352,0.3414364940495648] |[7.0,3.0]|
|0    |0.0       |[0.714317980714508,0.285682019285492]   |[8.0,2.0]|
+-----+----------+----------------------------------------+---------