## Pyspark SetUp

In [1]:
import pyspark

# Setup Spark session
from pyspark.sql import SparkSession
from pyspark.context import SparkContext
from pyspark.sql import functions as F

#Sspark = SparkSession.builder.master("local[*]").getOrCreate()

spark = SparkSession.builder.getOrCreate()


# Read csv file wiith spark.read_csv()
titles_path = "./tmp/netflix_titles_dirty*.csv.gz"
df = spark.read.option("sep", "\t").option("header", False).csv(titles_path)

df.show(4)

+--------+-----+--------------------+----------------+--------------------+--------------+----------------+----+-----+-------+--------------------+--------------------+
|     _c0|  _c1|                 _c2|             _c3|                 _c4|           _c5|             _c6| _c7|  _c8|    _c9|                _c10|                _c11|
+--------+-----+--------------------+----------------+--------------------+--------------+----------------+----+-----+-------+--------------------+--------------------+
|70206826|Movie|    Victim of Beauty|     Roger Young|William Devane, J...| United States| October 1, 2011|1991|   NR| 93 min|   Dramas, Thrillers|A beauty pageant ...|
|81029831|Movie|All's Well, End's...|     Vincent Kok|Louis Koo, Sandra...|     Hong Kong|November 9, 2018|2009|TV-14| 99 min|Comedies, Interna...|Bound by a family...|
|80194289|Movie|Gabriel and the M...| Fellipe Barbosa|João Pedro Zappa,...|Brazil, France|November 9, 2018|2017|TV-MA|132 min|Dramas, Independe...|In a dra

## Dataset preparation

In [3]:
#  Copy dataset locally - descargará el archivo desde la URL proporcionada y lo guardará en la carpeta /tmp

!curl -o ./tmp/netflix_titles_dirty_02.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_02.csv.gz?raw=True
!curl -o ./tmp/netflix_titles_dirty_03.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_03.csv.gz?raw=True
!curl -o ./tmp/netflix_titles_dirty_04.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_04.csv.gz?raw=True
!curl -o ./tmp/netflix_titles_dirty_05.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_05.csv.gz?raw=True
!curl -o ./tmp/netflix_titles_dirty_06.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_06.csv.gz?raw=True
!curl -o ./tmp/netflix_titles_dirty_07.csv.gz https://github.com/datacamp/data-cleaning-with-pyspark-live-training/blob/master/data/netflix_titles_dirty_07.csv.gz?raw=True



In [4]:
from pyspark.sql import functions as F
df.count()
F.col("_c0")
# Select column with alias name
df.select("_c0").alias("r1").show()



+--------+
|     _c0|
+--------+
|70206826|
|81029831|
|80194289|
|80191219|
|81005091|
|80169786|
|80190859|
|80218634|
|81022683|
|80201862|
|80114869|
|80989924|
|80057250|
|70243445|
|80201542|
|81079723|
|81003058|
|80236993|
|80057565|
|81016857|
+--------+
only showing top 20 rows



#### Testing some pyspark stuff

In [5]:
# Creating dataFrame (row numbers must match)
df_test = spark.createDataFrame([("hola", "None", "kd"), ("jsd", "k", "jfef")], ['Col_a', 'Col_b']) # CORRECT 
print("Correct DF creation")
df_test.show()

from pyspark.sql.functions import isnull

# DF with Alias
print("DF creation with Alias columns")
df_test_corre_alias = df_test.select("Col_a", "Col_b", isnull("Col_a").alias("r1"), isnull(df_test.Col_b).alias("r2"))
df_test_corre_alias.show()
# df_test_INCORR= spark.createDataFrame([("hola", "None", "kd"), ("jsd", "k")]) # INCORRECT
# df_test_INCORR.show()

Correct DF creation
+-----+-----+----+
|Col_a|Col_b|  _3|
+-----+-----+----+
| hola| None|  kd|
|  jsd|    k|jfef|
+-----+-----+----+

DF creation with Alias columns
+-----+-----+-----+-----+
|Col_a|Col_b|   r1|   r2|
+-----+-----+-----+-----+
| hola| None|false|false|
|  jsd|    k|false|false|
+-----+-----+-----+-----+



In [6]:
# Utiliza collect() para recuperar los datos como una lista de filas
collected_data = df_test.collect()

# Itera sobre la lista de filas
for row in df_test:
    print(row)

Column<'Col_a'>
Column<'Col_b'>
Column<'_3'>


In [7]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

# (StructField("f1", StringType(), True) == StructField("f1", StringType(), True))

# Use StructType function
struct1 = StructType([StructField("f1", StringType(), True),
                      StructField("f1", IntegerType(), True),
                      StructField("f1", StringType(), True)
                      ])

df_test2 = spark.createDataFrame([("hola", 1, "kd"), ("jsd", 3, "jd")], struct1)
df_test2.show()

+----+---+---+
|  f1| f1| f1|
+----+---+---+
|hola|  1| kd|
| jsd|  3| jd|
+----+---+---+



### Schema and nulls

In [9]:
from pyspark.sql.functions import isnull

# Check if is null, if it does it won't convert to int
df._c0.cast("int")
df.printSchema()

# count entries that are not null in first column
print(df.select(isnull("_c0").alias("isnull")).filter("isnull = false").count())


# Determine how many rows have a column that converts properly to an integer value
df.select(F.col("_c0").cast("int").isNotNull()).count()


root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: string (nullable = true)
 |-- _c4: string (nullable = true)
 |-- _c5: string (nullable = true)
 |-- _c6: string (nullable = true)
 |-- _c7: string (nullable = true)
 |-- _c8: string (nullable = true)
 |-- _c9: string (nullable = true)
 |-- _c10: string (nullable = true)
 |-- _c11: string (nullable = true)

1999


2000

In [10]:
df_isnotnull = df.filter(F.col("_c0").cast("int").isNotNull())
df_isnotnull.show(5)
df_isnull = df.filter(F.col("_c0").cast("int").isNull())
df_isnull.show(5)


+--------+-------+--------------------+----------------+--------------------+--------------+----------------+----+-----+--------+--------------------+--------------------+
|     _c0|    _c1|                 _c2|             _c3|                 _c4|           _c5|             _c6| _c7|  _c8|     _c9|                _c10|                _c11|
+--------+-------+--------------------+----------------+--------------------+--------------+----------------+----+-----+--------+--------------------+--------------------+
|70206826|  Movie|    Victim of Beauty|     Roger Young|William Devane, J...| United States| October 1, 2011|1991|   NR|  93 min|   Dramas, Thrillers|A beauty pageant ...|
|81029831|  Movie|All's Well, End's...|     Vincent Kok|Louis Koo, Sandra...|     Hong Kong|November 9, 2018|2009|TV-14|  99 min|Comedies, Interna...|Bound by a family...|
|80194289|  Movie|Gabriel and the M...| Fellipe Barbosa|João Pedro Zappa,...|Brazil, France|November 9, 2018|2017|TV-MA| 132 min|Dramas, Ind

In [11]:
# count entries that are not null in first column
df_true = (df.select("*", isnull("_c0").alias("isnull")).filter("isnull = true"))
df_true.show()


+----+----+----+----+----+----+----+----+----+----+----+----+------+
| _c0| _c1| _c2| _c3| _c4| _c5| _c6| _c7| _c8| _c9|_c10|_c11|isnull|
+----+----+----+----+----+----+----+----+----+----+----+----+------+
|NULL|TV-G|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|NULL|  true|
+----+----+----+----+----+----+----+----+----+----+----+----+------+



In [12]:
from pyspark.sql.functions import isnull
df_prueba = spark.createDataFrame([(1, None), (None, 2)], ("a", "b"))
df_prueba.select("a", "b", isnull("a").alias("r1"), isnull("b").alias("r2")).show()

+----+----+-----+-----+
|   a|   b|   r1|   r2|
+----+----+-----+-----+
|   1|NULL|false| true|
|NULL|   2| true|false|
+----+----+-----+-----+



### Fixing entries starting "#"

In [13]:
# Load the files into a DataFrame with a single column
#titles_path = "./tmp/netflix_titles_dirty*.csv.gz"
df_single_col = spark.read.option("sep", "{").option("header", False).csv(titles_path)
df_single_col.show(10)
df_single_col.count()

+--------------------+
|                 _c0|
+--------------------+
|70206826\tMovie\t...|
|81029831\tMovie\t...|
|80194289\tMovie\t...|
|80191219\tMovie\t...|
|81005091\tTV Show...|
|80169786\tTV Show...|
|80190859\tMovie\t...|
|80218634\tTV Show...|
|81022683\tTV Show...|
|80201862\tTV Show...|
+--------------------+
only showing top 10 rows



2000

In [14]:
# Filter DataFrame and show rows that starts with #
df_filtered = df_single_col.filter(~F.col("_c0").startswith("#"))
df_filtered.count()
df_filtered.show()

+--------------------+
|                 _c0|
+--------------------+
|70206826\tMovie\t...|
|81029831\tMovie\t...|
|80194289\tMovie\t...|
|80191219\tMovie\t...|
|81005091\tTV Show...|
|80169786\tTV Show...|
|80190859\tMovie\t...|
|80218634\tTV Show...|
|81022683\tTV Show...|
|80201862\tTV Show...|
|80114869\tTV Show...|
|80989924\tTV Show...|
|80057250\tTV Show...|
|70243445\tMovie\t...|
|80201542\tMovie\t...|
|81079723\tMovie\t...|
|81003058\tMovie\t...|
|80236993\tMovie\t...|
|80057565\tMovie\t...|
|81016857\tTV Show...|
+--------------------+
only showing top 20 rows



###  Column counts

In [15]:
from pyspark.sql.functions import split

df_single_splitted = df_filtered.select(split(F.col("_c0"), "\t").alias("palabras"))
df_single_splitted.show(5, truncate=False)

+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|palabras                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

In [16]:
# Add a column representing the total number of fields / columns

from pyspark.sql.functions import size

df_single_splitted = df_filtered.withColumn("cantidad_palabras", size((split(F.col("_c0"), "\t"))))

df_single_splitted = df_single_splitted.sort(df_single_splitted.cantidad_palabras.desc())
df_single_splitted.show(5)


+--------------------+-----------------+
|                 _c0|cantidad_palabras|
+--------------------+-----------------+
|80123950\tTV Show...|               33|
|70044693\tMovie\t...|               33|
|80218819\tTV Show...|               33|
|81033086\tMovie\t...|               32|
|80004534\tMovie\t...|               31|
+--------------------+-----------------+
only showing top 5 rows



In [17]:
# Show rows with a fieldcount > 12

df_single_splitted_above12 = df_single_splitted.filter(df_single_splitted.cantidad_palabras > 12)
df_single_splitted_above12.count()



12

In [18]:
# Check for any rows with fewer than 12 columns

df_single_splitted_below12 = df_single_splitted.filter(df_single_splitted.cantidad_palabras < 12)
df_single_splitted_below12.count()

14

In [19]:
# Determine total number of "bad" rows

df_filtered_char = df_filtered.filter(F.col("_c0").startswith("#"))
df_filtered_char.count()

0

In [20]:
# Set the dataframe without the bad rows 
# df_single_splitted = df_filtered.select(split(F.col("_c0"), "\t").alias("palabras"))
df_single_splitted_twelve = df_single_splitted.filter(df_single_splitted.cantidad_palabras == 12)
df_single_splitted_twelve.count()

1960

In [21]:
df_single_splitted_twelve_final = df_single_splitted_twelve.withColumn("cols_splitted", ((split(F.col("_c0"), "\t"))))
df_single_splitted_twelve_final.show(5)

+--------------------+-----------------+--------------------+
|                 _c0|cantidad_palabras|       cols_splitted|
+--------------------+-----------------+--------------------+
|70206826\tMovie\t...|               12|[70206826, Movie,...|
|80142103\tMovie\t...|               12|[80142103, Movie,...|
|81029831\tMovie\t...|               12|[81029831, Movie,...|
|80179907\tMovie\t...|               12|[80179907, Movie,...|
|80194289\tMovie\t...|               12|[80194289, Movie,...|
+--------------------+-----------------+--------------------+
only showing top 5 rows



### Arranging column Names and structures

In [22]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

df_1 = (
    df_single_splitted_twelve_final
    .withColumn(
        'show_id', df_single_splitted_twelve_final.cols_splitted.getItem(0).cast(IntegerType())

        )
    .withColumn(
        'type', df_single_splitted_twelve_final.cols_splitted.getItem(1).cast(StringType())
    )
    .withColumn(
        'title', df_single_splitted_twelve_final.cols_splitted.getItem(2).cast(StringType())
    )
        .withColumn(
        'director', df_single_splitted_twelve_final.cols_splitted.getItem(3).cast(StringType())
    )
        .withColumn(
        'cast', df_single_splitted_twelve_final.cols_splitted.getItem(4).cast(StringType())
    )
        .withColumn(
        'country', df_single_splitted_twelve_final.cols_splitted.getItem(5).cast(StringType())
    )
        .withColumn(
        'date_added', df_single_splitted_twelve_final.cols_splitted.getItem(6).cast(StringType())
    )
        .withColumn(
        'release_year', df_single_splitted_twelve_final.cols_splitted.getItem(7).cast(StringType())
    )
        .withColumn(
        'rating', df_single_splitted_twelve_final.cols_splitted.getItem(8).cast(StringType())
    )
        .withColumn(
        'duration', df_single_splitted_twelve_final.cols_splitted.getItem(9).cast(StringType())
    )
        .withColumn(
        'listed_in', df_single_splitted_twelve_final.cols_splitted.getItem(10).cast(StringType())
    )
        .withColumn(
        'description', df_single_splitted_twelve_final.cols_splitted.getItem(11).cast(StringType())
    )
    )

df_1 = df_1.drop("_c0", "cantidad_palabras", "cols_splitted")
df_1.show(5)


+--------+-------+--------------------+----------------+--------------------+--------------+----------------+------------+------+--------+--------------------+--------------------+
| show_id|   type|               title|        director|                cast|       country|      date_added|release_year|rating|duration|           listed_in|         description|
+--------+-------+--------------------+----------------+--------------------+--------------+----------------+------------+------+--------+--------------------+--------------------+
|70206826|  Movie|    Victim of Beauty|     Roger Young|William Devane, J...| United States| October 1, 2011|        1991|    NR|  93 min|   Dramas, Thrillers|A beauty pageant ...|
|81029831|  Movie|All's Well, End's...|     Vincent Kok|Louis Koo, Sandra...|     Hong Kong|November 9, 2018|        2009| TV-14|  99 min|Comedies, Interna...|Bound by a family...|
|80194289|  Movie|Gabriel and the M...| Fellipe Barbosa|João Pedro Zappa,...|Brazil, France|Nov

In [23]:
df_1.select("type").distinct().show()


+-------+
|   type|
+-------+
|TV Show|
|  Movie|
|       |
+-------+



In [24]:
df_1.select("type").filter(df_1.type == "").count()
df_1_type = df_1.select("type").filter(df_1.type != "")
df_1.select("type").distinct().show()


+-------+
|   type|
+-------+
|TV Show|
|  Movie|
|       |
+-------+



In [25]:
df_1.select("type").describe().show()


+-------+-------+
|summary|   type|
+-------+-------+
|  count|   1960|
|   mean|   NULL|
| stddev|   NULL|
|    min|       |
|    max|TV Show|
+-------+-------+



In [26]:
from pyspark.sql.functions import udf
from pyspark.sql.types import IntegerType

df_udf = spark.createDataFrame([(1, "John Doe", 21), (2, "Don Sewe", 41)], ("id", "name", "age"))

@udf (returnType=IntegerType())
def multiply(a, b):
  if a == 3:
    return 0
  else:
    return a*b

df_udf1 = df_udf.withColumn('output', multiply(F.col('id'), F.col('age')))

df_udf1.show()

# this is same as using @udf:
    # udfMultiply = udf(multiply, IntegerType()) para luego hacer
    # df_1 = df_1.withColumn('output', udfMultiply(F.col('a'), F.col('b')))


+---+--------+---+------+
| id|    name|age|output|
+---+--------+---+------+
|  1|John Doe| 21|    21|
|  2|Don Sewe| 41|    82|
+---+--------+---+------+



### Column Types Fix

In [27]:
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType


@udf (returnType=StringType())
def tv_or_movie(type, duration):
    if type == "Movie" or type == "TV Show":
        return type
    elif duration.endswith('min'):
        return "Movie"
    else: 
        return "TV Show"
    



df_udf1_types = df_1.withColumn('derivedType', tv_or_movie(F.col('type'), F.col('duration')))

df_udf1_types.show()

+--------+-------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-----------+
| show_id|   type|               title|            director|                cast|             country|      date_added|release_year|rating|duration|           listed_in|         description|derivedType|
+--------+-------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-----------+
|70206826|  Movie|    Victim of Beauty|         Roger Young|William Devane, J...|       United States| October 1, 2011|        1991|    NR|  93 min|   Dramas, Thrillers|A beauty pageant ...|      Movie|
|81029831|  Movie|All's Well, End's...|         Vincent Kok|Louis Koo, Sandra...|           Hong Kong|November 9, 2018|        2009| TV-14|  99 min|Comedies, Interna...|Bound by a family..

In [46]:
df_udf1_types.filter(df_udf1_types.derivedType == "").count()
df_udf1_types_new = df_udf1_types.drop("type")
df_udf1_types_new = df_udf1_types_new.withColumnRenamed("derivedType", "Type")



In [29]:
df_udf1_types_new.show()

+--------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-------+
| show_id|               title|            director|                cast|             country|      date_added|release_year|rating|duration|           listed_in|         description|   Type|
+--------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-------+
|70206826|    Victim of Beauty|         Roger Young|William Devane, J...|       United States| October 1, 2011|        1991|    NR|  93 min|   Dramas, Thrillers|A beauty pageant ...|  Movie|
|81029831|All's Well, End's...|         Vincent Kok|Louis Koo, Sandra...|           Hong Kong|November 9, 2018|        2009| TV-14|  99 min|Comedies, Interna...|Bound by a family...|  Movie|
|80194289|Gabriel and the M...|     Fellipe B

In [30]:
df_udf1_types_new.select("Type").distinct().show()
df_udf1_types_new.count()

+-------+
|   Type|
+-------+
|TV Show|
|  Movie|
+-------+



1960

In [31]:
df_udf1_types_new.write.parquet("C:/Users/ana.orellano\Documents/tests/tmp/parquet_folders", mode="overwrite")

In [32]:
# Combine the data into a single file using the .coalesce(1) transformation. Spark normally keeps data in separate files to improve performance and bypass RAM issues.
# df_udf1_types_new.coalesce(1).write.csv("C:/Users/ana.orellano\Documents/tests/tmp/netflix_titles_cleaned_Coal", sep='\t', header=True)

In [44]:
# df_cast = df_udf1_types_new.select("cast")
df_udf1_types_new.select("cast").show(truncate=False)
# df_cast.show(truncate=False)

+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|cast                                                                                                                                                                                                                                                                            |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|Jena Malone, Douglas Smith, Ted Levine, Tamara Duarte, Kelly Pendygraft, Mark Sivertsen, Jon McLaren                                                                          

In [45]:
# df_udf1_types_new = df_udf1_types_new.withColumn("array_cast", )

df_udf_cast = df_udf1_types_new.withColumn("cast_array", cast_to_array(F.col('cast')).cast(ArrayType(StringType())))

df_udf_cast.show()

PythonException: 
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "C:\Users\ana.orellano\AppData\Local\Temp\ipykernel_19608\4223040582.py", line 7, in cast_to_array
  File "C:\Spark\Spark3\python\lib\pyspark.zip\pyspark\sql\utils.py", line 174, in wrapped
    return f(*args, **kwargs)
  File "C:\Spark\Spark3\python\lib\pyspark.zip\pyspark\sql\functions.py", line 9355, in split
    return _invoke_function("split", _to_java_column(str), pattern, limit)
  File "C:\Spark\Spark3\python\lib\pyspark.zip\pyspark\sql\column.py", line 63, in _to_java_column
    jcol = _create_column_from_name(col)
  File "C:\Spark\Spark3\python\lib\pyspark.zip\pyspark\sql\column.py", line 55, in _create_column_from_name
    sc = get_active_spark_context()
  File "C:\Spark\Spark3\python\lib\pyspark.zip\pyspark\sql\utils.py", line 248, in get_active_spark_context
    raise RuntimeError("SparkContext or SparkSession should be created first.")
RuntimeError: SparkContext or SparkSession should be created first.


In [35]:
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType


@udf (returnType=ArrayType(StringType()))
def cast_to_array(cast_str):
   return split(cast_str, ", ")
    


# df_udf_cast = df_1.withColumn('derivedType', tv_or_movie(F.col('type'), F.col('duration')))



In [None]:
# Crear un DataFrame (reemplaza esto con tu propio DataFrame)
data = [("William Devane, Jeri Ryan, Michele Abrams",),
        ("Jena Malone, Douglas Smith, Ted Levine",),
        ("Louis Koo, Sandra Ng Kwan Yue, Raymond Wong",)]

columns = ["cast"]
df_array_test = spark.createDataFrame(data, columns)

df_array_test = df_array_test.withColumn("cast_array", cast_to_array(F.col('cast'))) #.cast(ArrayType(StringType())))


In [42]:
df_array_test.count()


3

In [48]:
# from pyspark.sql import SparkSession
# from pyspark.sql.functions import split

# # Inicializar una sesión de Spark
# spark = SparkSession.builder.appName("MiAplicacion").getOrCreate()

# # Crear un DataFrame
# data = [("William Devane, Jeri Ryan, Michele Abrams",),
#         ("Jena Malone, Douglas Smith, Ted Levine",),
#         ("Louis Koo, Sandra Ng Kwan Yue, Raymond Wong",)]

# columns = ["cast"]
# df_array_test = spark.createDataFrame(data, columns)

# Aplicar la función split
df_array_test = df_udf1_types_new.withColumn("cast_array", split(df_udf1_types_new["cast"], ", "))

# Mostrar el DataFrame resultante
df_array_test.show()


+--------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-------+--------------------+
| show_id|               title|            director|                cast|             country|      date_added|release_year|rating|duration|           listed_in|         description|   Type|          cast_array|
+--------+--------------------+--------------------+--------------------+--------------------+----------------+------------+------+--------+--------------------+--------------------+-------+--------------------+
|70206826|    Victim of Beauty|         Roger Young|William Devane, J...|       United States| October 1, 2011|        1991|    NR|  93 min|   Dramas, Thrillers|A beauty pageant ...|  Movie|[William Devane, ...|
|81029831|All's Well, End's...|         Vincent Kok|Louis Koo, Sandra...|           Hong Kong|November 9, 2018|        2009| TV-14|  99 min|Comedies, In

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 55891)
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\socketserver.py", line 316, in _handle_request_noblock
    self.process_request(request, client_address)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\socketserver.py", line 347, in process_request
    self.finish_request(request, client_address)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\socketserver.py", line 360, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\socketserver.py", line 747, in __init__
    self.handle()
  File "C:\Users\ana.orellano\AppData\Local\Packa