In [5]:
from pyspark.sql import SparkSession, functions as F
from delta import configure_spark_with_delta_pip
from pyspark.sql.functions import col, trim, lower, regexp_replace
import os

In [6]:
mongo_connector_jar = "/home/provira/Documents/TFM/TFM/src/P2/trusted_zone/jars/mongo-spark-connector_2.12-3.0.1.jar"
mongo_driver_jar = "/home/provira/Documents/TFM/TFM/src/P2/trusted_zone/jars/mongo-java-driver-3.12.10.jar"

In [None]:
builder = SparkSession.builder \
    .appName("Explotation_zone") \
    .config("spark.jars", f"{mongo_connector_jar},{mongo_driver_jar}") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.mongodb.read.connection.uri", "mongodb://localhost:27017") \
    .config("spark.mongodb.write.connection.uri", "mongodb://localhost:27017")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [8]:
df = spark.read \
    .format("mongo") \
    .option("uri", "mongodb://localhost:27017/tfm.tf-idf") \
    .load()


                                                                                

# JOIN - Positive Emotions

In [9]:
good_emotions = [
    "happy",
    "joy",
    "love",
    "excited",
    "relieved",
    "grateful",
    "content",
    "proud",
    "hopeful",
    "amused"
]

bad_emotions = [
    "sad",
    "angry",
    "fear",
    "disgust",
    "anxious",
    "jealous",
    "frustrated",
    "guilty",
    "ashamed",
    "lonely"
]

In [None]:
print(df.columns)
df_positive = df.filter(df.emotion.isin(good_emotions)) \
    .dropDuplicates()
df_positive.show(5)


['Emotion', '_id', 'filtered_words', 'raw_features_array', 'stemmed_words', 'text', 'tfidf_features_array', 'words']


                                                                                

+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|Emotion|                 _id|      filtered_words|  raw_features_array|       stemmed_words|                text|tfidf_features_array|               words|
+-------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|   love|{685ea75d0c4b1155...|[think, uniform, ...|[1.0, 0.0, 0.0, 0...|[think, uniform, ...|i think his unifo...|[0.02426812797784...|[i, think, his, u...|
|   love|{685ea7590c4b1155...|[feel, like, im, ...|[1.0, 1.0, 1.0, 0...|[feel, like, im, ...|i feel like im ba...|[0.02426812797784...|[i, feel, like, i...|
|   love|{685ec32c0a3cc53c...|[feel, jealous, b...|[1.0, 0.0, 0.0, 0...|[feel, jealous, b...|i feel jealous be...|[0.02426812797784...|[i, feel, jealous...|
|   love|{685ec3360a3cc53c...|[feel, passionate...|[1.0, 0

In [None]:
df_positive.select(
    "text", "emotion", "words", "filtered_words", "stemmed_words",
    "raw_features_array", "tfidf_features_array"
).write \
    .format("mongo") \
    .option("uri", "mongodb://localhost:27017/tfm_explotation_zone.join_positive_emotions") \
    .mode("append") \
    .save()

                                                                                