In [1]:
import pymssql
import pandas as pd

In [None]:
#conexión con la BD
conn = pymssql.connect(
    server='127.0.0.1',
    user='sa',
    password='12345678',
    database='master1',
    as_dict= False
)

In [7]:
# Se crea el cursor para manejo de qwery
cur = conn.cursor()

In [4]:
# Crear una nueva BD

db_name = "etl_db"

try:
    # se verifica que no hayan transacciones de la bd que esten activas
    cur.execute("IF @@TRANCOUNT > 0 ROLLBACK TRANSACTION")

    # Se consulta si la Bd existe
    cur.execute(f"SELECT name FROM sys.databases WHERE name = '{db_name}'")
    resultado = cur.fetchone()

    if resultado:
        print(f"La base de datos '{db_name}' ya existe.")
    else:
        cur.execute(f"CREATE DATABASE {db_name}")
        print(f"Base de datos '{db_name}' creada.")

except Exception as e:
    print(f"Error al verificar o crear la base de datos: {e}")

La base de datos 'etl_db' ya existe.


In [8]:
# crear tabla
tb_name = "tabla_etl"

try:
    # Verificar si la tabla existe
    cur.execute(f"SELECT COUNT(*) FROM sys.tables WHERE name = '{tb_name}'")
    resultado = cur.fetchone()

    if resultado[0] > 0:
        print(f"La tabla '{tb_name}' ya existe.")
    else:
        cur.execute(f"""
            CREATE TABLE {tb_name} (
                id INT IDENTITY(1,1) PRIMARY KEY,
                nombre VARCHAR(100) NOT NULL,
                edad INT,
                email VARCHAR(100),
                fecha_registro DATETIME DEFAULT GETDATE()
            )
        """)
        print(f"Tabla '{tb_name}' creada exitosamente.")

    conn.commit() # confirmar los cambios
except Exception as e:
    print(f"Error al verificar o crear la tabla: {e}")
    conn.rollback()

La tabla 'tabla_etl' ya existe.


In [9]:
# Insertar datos en la tabla

sql = f"INSERT INTO {tb_name} (nombre, edad, email) VALUES (%s, %s, %s)"
datos = [
        ('Juan Pérez', 28, 'juan@example.com'),
        ('María García', 34, 'maria@example.com'),
        ('Carlos López', 25, 'carlos@example.com')
    ]

cur.executemany(sql, datos)

In [10]:
# Leer datos de la tabla

result = f"SELECT * FROM {tb_name}"
cur.execute(result)
# Obtener los nombres de las columnas
columns = [column[0] for column in cur.description]
rows = cur.fetchall()

# Mostrar los resultados
print("Datos en 'tabla_etl':")
for row in rows:
    print(row)

Datos en 'tabla_etl':
(1, 'Juan Pérez', 28, 'juan@example.com', datetime.datetime(2025, 2, 24, 17, 47, 21, 167000))
(2, 'María García', 34, 'maria@example.com', datetime.datetime(2025, 2, 24, 17, 47, 21, 170000))
(3, 'Carlos López', 25, 'carlos@example.com', datetime.datetime(2025, 2, 24, 17, 47, 21, 170000))
(4, 'Juan Pérez', 28, 'juan@example.com', datetime.datetime(2025, 2, 24, 17, 47, 53, 290000))
(5, 'María García', 34, 'maria@example.com', datetime.datetime(2025, 2, 24, 17, 47, 53, 300000))
(6, 'Carlos López', 25, 'carlos@example.com', datetime.datetime(2025, 2, 24, 17, 47, 53, 300000))
(7, 'Juan Pérez', 28, 'juan@example.com', datetime.datetime(2025, 2, 25, 16, 13, 25, 850000))
(8, 'María García', 34, 'maria@example.com', datetime.datetime(2025, 2, 25, 16, 13, 25, 850000))
(9, 'Carlos López', 25, 'carlos@example.com', datetime.datetime(2025, 2, 25, 16, 13, 25, 850000))


In [11]:
# Mostrar el DataFrame
df = pd.DataFrame(rows, columns=columns)
df.reset_index() 
print(df)

   id        nombre  edad               email          fecha_registro
0   1    Juan Pérez    28    juan@example.com 2025-02-24 17:47:21.167
1   2  María García    34   maria@example.com 2025-02-24 17:47:21.170
2   3  Carlos López    25  carlos@example.com 2025-02-24 17:47:21.170
3   4    Juan Pérez    28    juan@example.com 2025-02-24 17:47:53.290
4   5  María García    34   maria@example.com 2025-02-24 17:47:53.300
5   6  Carlos López    25  carlos@example.com 2025-02-24 17:47:53.300
6   7    Juan Pérez    28    juan@example.com 2025-02-25 16:13:25.850
7   8  María García    34   maria@example.com 2025-02-25 16:13:25.850
8   9  Carlos López    25  carlos@example.com 2025-02-25 16:13:25.850


In [12]:
# transformar los datos
df_transformed = df.copy()
df_transformed["nombre"] = df_transformed["nombre"].str.upper()  # Uppercase names
df_transformed["age_category"] = df_transformed["edad"].apply(
    lambda x: "Young" if x < 30 else "Middle-aged" if x < 50 else "Senior"
)

# Display transformed DataFrame
df_transformed.head()

Unnamed: 0,id,nombre,edad,email,fecha_registro,age_category
0,1,JUAN PÉREZ,28,juan@example.com,2025-02-24 17:47:21.167,Young
1,2,MARÍA GARCÍA,34,maria@example.com,2025-02-24 17:47:21.170,Middle-aged
2,3,CARLOS LÓPEZ,25,carlos@example.com,2025-02-24 17:47:21.170,Young
3,4,JUAN PÉREZ,28,juan@example.com,2025-02-24 17:47:53.290,Young
4,5,MARÍA GARCÍA,34,maria@example.com,2025-02-24 17:47:53.300,Middle-aged


In [14]:
# crear tabla transformada
tb_name = "transformed_data2"

try:
    # Verificar si la tabla existe
    cur.execute(f"SELECT COUNT(*) FROM sys.tables WHERE name = '{tb_name}'")
    resultado = cur.fetchone()

    if resultado[0] > 0:
        print(f"La tabla '{tb_name}' ya existe.")
    else:
        cur.execute(f"""
            CREATE TABLE {tb_name} (
                id INT IDENTITY(1,1) PRIMARY KEY,
                nombre VARCHAR(100) NOT NULL,
                edad INT,
                email VARCHAR(100),
                fecha_registro DATETIME DEFAULT GETDATE(),
                age_category VARCHAR(20)
            )
        """)
        print(f"Tabla '{tb_name}' creada exitosamente.")

    conn.commit()  # confirmar los cambios
except Exception as e:
    print(f"Error al verificar o crear la tabla: {e}")
    conn.rollback()

Tabla 'transformed_data2' creada exitosamente.


In [27]:
# crear tabla para etl
db_name='transformed_etl'

# Verificar si la tabla existe
cur.execute(f"SELECT COUNT(*) FROM sys.tables WHERE name = '{tb_name}'")
resultado = cur.fetchone()

if resultado[0] == 0:
    cur.execute(f"""
        CREATE TABLE {tb_name} (
            id INT IDENTITY(1,1) PRIMARY KEY,
            nombre VARCHAR(100) NOT NULL,
            edad INT,
            email VARCHAR(100),
            fecha_registro DATETIME DEFAULT GETDATE(),
            age_category VARCHAR(20)
            )
        """)
    print(f" Tabla '{tb_name}' creada exitosamente.")

#Insertar datos
sql = f"""
    INSERT INTO {tb_name} (nombre, edad, email, fecha_registro, age_category) 
    VALUES (%s, %s, %s, %s, %s)
    """

# El dataframe se vuelve lista
data_to_insert = [
    (row["nombre"], row["edad"], row["email"], row["fecha_registro"], row["age_category"])
    for _, row in df_transformed.iterrows()
    ]

# Ejecutar inserción en lote
cur.executemany(sql, data_to_insert)

In [28]:
# Leer datos de la tabla
db_name='transformed_etl'

result = f"SELECT * FROM {tb_name}"
cur.execute(result)
# Obtener los nombres de las columnas
columns = [column[0] for column in cur.description]
rows = cur.fetchall()

# Convertir datos en DataFrame e imprimir
db_transformed_etl = pd.DataFrame(rows, columns=columns)
print(db_transformed_etl)


   id        nombre  edad               email          fecha_registro  \
0   1    JUAN PÉREZ    28    juan@example.com 2025-02-24 17:47:21.167   
1   2  MARÍA GARCÍA    34   maria@example.com 2025-02-24 17:47:21.170   
2   3  CARLOS LÓPEZ    25  carlos@example.com 2025-02-24 17:47:21.170   
3   4    JUAN PÉREZ    28    juan@example.com 2025-02-24 17:47:53.290   
4   5  MARÍA GARCÍA    34   maria@example.com 2025-02-24 17:47:53.300   
5   6  CARLOS LÓPEZ    25  carlos@example.com 2025-02-24 17:47:53.300   

  age_category  
0        Young  
1  Middle-aged  
2        Young  
3        Young  
4  Middle-aged  
5        Young  


In [29]:
#Cerrar cursor y conexión
cur.close()
conn.close()