In [None]:
import pandas as pd
import sqlalchemy as alch
from getpass import getpass
import sys
sys.path.append("../")
import src.soporte as sp
import src.biblioteca as bb

# Loading data

In [None]:
# Loading our data from our CSV
mercadona = pd.read_csv("../data/mercadona_limpio.csv", index_col = 0)
dia = pd.read_csv("../data/dia_limpio.csv", index_col = 0)

In [None]:
# Also the data that we scraped from Dia
dia_18 = pd.read_csv("../data/dia_2023-01-18.csv", index_col = 0)
dia_19 = pd.read_csv("../data/dia_2023-01-19.csv", index_col = 0)
dia_20 = pd.read_csv("../data/dia_2023-01-20.csv", index_col = 0)

In [None]:
# And the data that we scraped from Mercadona
mercadona_20 = pd.read_csv("../data/merc2023-01-20_suma.csv", index_col = 0)

# Data manipulation

In [None]:
# There I drop some columnes because I didn't used the "index_col = 0" obtion when loading
mercadona_20.drop(["Unnamed: 0.1", "Unnamed: 0", "category_id"],axis= 1, inplace = True)

In [None]:
# Also drop some duplicated info
mercadona_20.drop_duplicates(inplace = True)

In [None]:
# I create our subcategories and categories
mercadona_20["subcategoria"] = mercadona_20.apply(lambda x: sp.mer_subcat(x["category"]), axis = 1)
mercadona_20["category"] = mercadona_20.apply(lambda x: sp.category(x["subcategoria"]), axis = 1)

In [None]:
# I drop that column because I already know the supermarket that it belongs
dia.drop(["supermarket"], axis = 1, inplace = True)

In [None]:
# I put together all my data from Dia
dia_sumando = pd.concat([dia, dia_18], axis = 0, ignore_index = True)
dia_sumando = pd.concat([dia_sumando, dia_19], axis = 0, ignore_index = True)
dia_total = pd.concat([dia_sumando, dia_20], axis = 0, ignore_index = True)
dia_total.head(2)

In [None]:
# Droping the data that are not interesing to my project
mercadona.drop(["supermarket", "category"], axis = 1, inplace = True)
mercadona.rename(columns = {"categoria": "category"}, inplace = True)

In [None]:
# I merge all my data
mercadona_total = pd.concat([mercadona, mercadona_20], axis = 0, ignore_index = True)
mercadona_total.head(2)

In [None]:
# Putting together all the data to have the info of all the products
dia_mercadona_total = pd.concat([mercadona_total, dia_total], axis = 0, ignore_index = True)

In [None]:
# Here I create Ids to every unique product
productos_todos = dia_mercadona_total.drop(["price", "reference_price", "reference_unit", "insert_date"], axis = 1) # Droping columns that I won't use
productos = pd.DataFrame(productos_todos["name"].unique()).reset_index() # Taking all the unique values and reseting the index to make the ID
productos.columns = ["product_id", "name"] # Changinf the name for better manipulation
productos.head(2)

In [None]:
# Pairing the products with the supermarket so I get the ID
productos_todos_conid = productos_todos.merge(productos,how ="inner", on = "name")
productos_conid = productos_todos_conid.drop_duplicates(subset=["product_id"])
productos_conid.head(2)

In [None]:
# There I replace some problematics characters
productos_conid["name"] = productos_conid["name"].apply(lambda x : x.replace("'","-").replace("%","/100"))

In [None]:
# Paring the products of mercadona with their respective ID
productos_mercadona = mercadona_total.merge(productos, how = 'inner', on = "name")

In [None]:
# Reseting the index to make an ID and droping some products that are not interesting to my proyect
productos_mercadona.reset_index(inplace= True)
productos_mercadona.rename(columns = {"index": "price_id"}, inplace = True)
productos_mercadona.dropna(inplace =True)
productos_mercadona.head(2)

In [None]:
# Merging Dia's products with their respective ID
productos_dia = dia_total.merge(productos, how = "inner", on = "name" )

In [None]:
# Reseting the index to make and ID for the price, also I increase the value to make it unique and after I change the name of the columns for better usage
productos_dia.reset_index(inplace= True)
productos_dia.rename(columns = {"index": "price_id"}, inplace = True)
productos_dia["price_id"] = productos_dia["price_id"].apply(lambda x: (x + 5000000))
productos_mercadona.dropna(inplace =True)
productos_dia.head(2)

# Data Insertion

Establishing the conection

In [None]:
password = getpass("Contraseña de MySQL")
db_name = "supermercados"
conexion = f"mysql+pymysql://root:{password}@localhost/{db_name}"
engine = alch.create_engine(conexion)

Inserting the data of our supermarkets, I have two supermarkets so gonna make it manualy

In [None]:
engine.execute("""
    INSERT INTO supermercado (idsupermercado, nombre_supermercado)
    VALUES(1, "mercadona")
    """)
engine.execute("""
    INSERT INTO supermercado (idsupermercado, nombre_supermercado)
    VALUES(2, "dia")
    """)

Inserting our data of the products

In [None]:
for index, row in productos_conid.iterrows():
    producto_id = engine.execute(f"""SELECT idproductos FROM productos WHERE idproductos = "{row['product_id']}";""")

    if len(list(producto_id)) > 0:
        print(f"Este index {index} ya existe la ID.")
    else:
        try:
            engine.execute(f"""
                INSERT INTO productos (idproductos, nombre_producto, categoria, subcategoria)
                VALUES ("{row['product_id']}", "{row['name']}", "{row['category']}", "{row['subcategoria']}");""")
        except:
            print(f"""
                INSERT INTO productos (idproductos, nombre_producto, categoria, subcategoria)
                VALUES ("{row['product_id']}", "{row['name']}", "{row['category']}", "{row['subcategoria']}");""")

After all the historic data of Mercadona

In [None]:
for index, row in productos_mercadona.iterrows():
    precio_id = engine.execute(f"""SELECT idprecio FROM precios WHERE idprecio = "{row['price_id']}";""")

    if len(list(precio_id)) > 0:
        print(f"El ID {index} ya existe.")
    else:
        try:
            engine.execute(f"""
                INSERT INTO precios (idprecio, precio_unidad, precio_referencia, referencia, productos_idproductos, supermercado_idsupermercado, fecha)
                VALUES ("{row['price_id']}", "{row['price']}", "{row['reference_price']}", "{row['reference_unit']}", "{row['product_id']}", 1, "{row['insert_date']}");""")
        except:
            print(f"Este índice {index}, no funciona.")
            

And the historic data of Dia

In [None]:
for index, row in productos_dia.iterrows():
    precio_id = engine.execute(f"""SELECT idprecio FROM precios WHERE idprecio = "{row['price_id']}";""")

    if len(list(precio_id)) > 0:
        print(f"El ID {index} ya existe.")
    else:
        try:
            engine.execute(f"""
                INSERT INTO precios (idprecio, precio_unidad, precio_referencia, referencia, productos_idproductos, supermercado_idsupermercado, fecha)
                VALUES ("{row['price_id']}", "{row['price']}", "{row['reference_price']}", "{row['reference_unit']}", "{row['product_id']}", 2 ,"{row['insert_date']}");""")
        except:
            print(f"""
                INSERT INTO precios (idprecio, precio_unidad, precio_referencia, referencia, productos_idproductos, supermercado_idsupermercado, fecha)
                VALUES ("{row['price_id']}", "{row['price']}", "{row['reference_price']}", "{row['reference_unit']}", "{row['product_id']}", 2 ,"{row['insert_date']}");""")