## 1. Colab Setup

In [None]:
# Install PySpark and Spark NLP
! pip install -q pyspark==3.3.0 spark-nlp==4.2.8

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.3/281.3 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m453.8/453.8 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pyspark (setup.py) ... [?25l[?25hdone


In [None]:
import json
import pandas as pd
import numpy as np

import sparknlp
import pyspark.sql.functions as F

from pyspark.ml import Pipeline
from pyspark.sql import SparkSession
from sparknlp.annotator import *
from sparknlp.base import *
from sparknlp.pretrained import PretrainedPipeline
from pyspark.sql.types import StringType, IntegerType

In [None]:
df_french = pd.read_csv("/content/df_french_with_mentions.csv")
# Convert Pandas DataFrame to Spark DataFrame
df_french_spark = spark.createDataFrame(df_french)

## 2. Start Spark Session

In [19]:
spark = SparkSession.builder.appName("Spark NLP").getOrCreate()
spark = sparknlp.start()

print("Spark NLP version", sparknlp.version())
print("Apache Spark version:", spark.version)

spark

Spark NLP version 4.2.8
Apache Spark version: 3.3.0


## 4. Define Spark NLP pipeline

In [None]:
# Assuming df_french is your existing DataFrame with a column named 'headline'

# Modify the DocumentAssembler to take 'Title' as input
document = DocumentAssembler()\
    .setInputCol("Title")\
    .setOutputCol("document")

# Rest of your pipeline remains the same
embeddings = BertSentenceEmbeddings\
    .pretrained('labse', 'xx')\
    .setInputCols(["document"])\
    .setOutputCol("sentence_embeddings")

sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_bert_sentiment", "fr")\
    .setInputCols(["sentence_embeddings"])\
    .setOutputCol("class_")

nlpPipeline = Pipeline(stages=[document, embeddings, sentimentClassifier])

labse download started this may take some time.
Approximate size to download 1.7 GB
[OK!]
classifierdl_bert_sentiment download started this may take some time.
Approximate size to download 22.2 MB
[OK!]


## 5. Run the pipeline

In [24]:

# Transform the df_french DataFrame
result = nlpPipeline.fit(df_french_spark).transform(df_french_spark)

# Extract sentiment and add it as a new column to df_french
df_french_with_sentiment = result.withColumn("sentiment", F.expr("class_.result[0]"))

# Show some results for verification
df_french_with_sentiment.select("Title", "sentiment").show(truncate=False)


+------------------------------------------------------------------------------+---------+
|Title                                                                         |sentiment|
+------------------------------------------------------------------------------+---------+
|dix-huit prevenus condamnes dans laffaire du camion charnier                  |NEGATIVE |
|elle sengage sur lautoroute avec le reservoir presque vide et se crashe       |NEGATIVE |
|au portugal, la course au poste de premier ministre est lancee                |POSITIVE |
|le vote de journalistes, dont celui de la rts, a surpris deschamps            |NEGATIVE |
|la premiere ministre refuse de hausser le salaire des ouvriers du textile     |NEGATIVE |
|50000 civils deplaces en raison des combats dans le nord                      |NEGATIVE |
|drame des verrieres: les deux enfants ont ete indemnises                      |NEGATIVE |
|situation revenue a la normale sur la ligne lausanne  geneve                  |POSITIVE |

In [25]:
df_french_with_sentiment_pandas = df_french_with_sentiment.toPandas()

In [27]:
df_french_with_sentiment_pandas

Unnamed: 0,Title,Header,Content,Mentioned_Countries,Mentioned_Swiss_Cities,document,sentence_embeddings,class_,sentiment
0,dix-huit prevenus condamnes dans laffaire du c...,justice francaise,un dessein purement lucratif. dix-huit des 19 ...,"{'viet nam', 'france', 'royaume-uni'}",set(),"[(document, 0, 59, dix-huit prevenus condamnes...","[(sentence_embeddings, 0, 59, dix-huit prevenu...","[(category, 0, 59, NEGATIVE, {'sentence': '0',...",NEGATIVE
1,elle sengage sur lautoroute avec le reservoir ...,argovie,malgre un signal lumineux indiquant que le sto...,{'suisse'},{'spreitenbach'},"[(document, 0, 70, elle sengage sur lautoroute...","[(sentence_embeddings, 0, 70, elle sengage sur...","[(category, 0, 70, NEGATIVE, {'sentence': '0',...",NEGATIVE
2,"au portugal, la course au poste de premier min...",politique,les principaux partis ont commence vendredi a ...,{'portugal'},set(),"[(document, 0, 61, au portugal, la course au p...","[(sentence_embeddings, 0, 61, au portugal, la ...","[(category, 0, 61, POSITIVE, {'sentence': '0',...",POSITIVE
3,"le vote de journalistes, dont celui de la rts,...",ballon d'or,dans le debat haaland-messi pour le ballon d'o...,"{'france', 'panama', 'bosnie-herzegovine', 'no...",set(),"[(document, 0, 65, le vote de journalistes, do...","[(sentence_embeddings, 0, 65, le vote de journ...","[(category, 0, 65, NEGATIVE, {'sentence': '0',...",NEGATIVE
4,la premiere ministre refuse de hausser le sala...,bangladesh,"la premiere ministre du bangladesh, sheikh has...",{'bangladesh'},set(),"[(document, 0, 72, la premiere ministre refuse...","[(sentence_embeddings, 0, 72, la premiere mini...","[(category, 0, 72, NEGATIVE, {'sentence': '0',...",NEGATIVE
...,...,...,...,...,...,...,...,...,...
1055,une tristesse indescriptible aux obseques de t...,france,nous taimons et nous ne toublierons jamais. pr...,{'france'},set(),"[(document, 0, 50, une tristesse indescriptibl...","[(sentence_embeddings, 0, 50, une tristesse in...","[(category, 0, 50, NEGATIVE, {'sentence': '0',...",NEGATIVE
1056,une voiture en feu a provoque la fermeture du ...,uri,le tunnel routier du gothard est reste ferme d...,{'suisse'},set(),"[(document, 0, 62, une voiture en feu a provoq...","[(sentence_embeddings, 0, 62, une voiture en f...","[(category, 0, 62, NEGATIVE, {'sentence': '0',...",NEGATIVE
1057,la commune voisine de paleo veut negocier des ...,trelex (vd),"ces derniers jours, le conseil communal de tre...",set(),set(),"[(document, 0, 63, la commune voisine de paleo...","[(sentence_embeddings, 0, 63, la commune voisi...","[(category, 0, 63, NEGATIVE, {'sentence': '0',...",NEGATIVE
1058,les patins crissent a nouveau au parc des bast...,ville de geneve,les mesures covid en 2021 puis les restriction...,{'schweiz'},{'geneve'},"[(document, 0, 49, les patins crissent a nouve...","[(sentence_embeddings, 0, 49, les patins criss...","[(category, 0, 49, NEGATIVE, {'sentence': '0',...",NEGATIVE


In [30]:
df_french_with_sentiment_pandas.to_csv('french_sentiment_analysis.csv', index=False)
from google.colab import files
files.download('french_sentiment_analysis.csv')

