# Base de datos

## Importando módulos

In [1]:
from sqlalchemy import create_engine
import psycopg2
import pandas as pd
import requests

## Conexion

In [2]:
def getPostgreConnector(stringConnection= 'postgresql+psycopg2://postgres:postgres@db-test.cq4syw9xqygb.us-east-1.rds.amazonaws.com:5432/postgres'):
    engine = create_engine(stringConnection)
    return engine

In [3]:
engine = getPostgreConnector()
with engine.connect() as connection:
    result = connection.execute('Select version()')
    print(result.fetchone())

('PostgreSQL 12.5 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-11), 64-bit',)


## Creando tablas

In [4]:
with engine.connect() as connection:
    connection.execute(""" CREATE TABLE product_groups_erick (
                                                    group_id serial PRIMARY KEY,
                                                    group_name VARCHAR (255) NOT NULL,
                                                    stock INT NOT NULL
                                                    );""")
    connection.execute(""" CREATE TABLE products_erick (
                                                    product_id serial PRIMARY KEY,
                                                    product_name VARCHAR (255) NOT NULL,
                                                    price DECIMAL (11, 2),
                                                    group_id INT NOT NULL,
                                                    FOREIGN KEY (group_id) REFERENCES product_groups_erick (group_id)
                                                  );""")

## Poblando DB

In [5]:
query_insert_product_group = """
                                INSERT INTO product_groups_erick (group_name, stock)
                                VALUES
                                  ('Smartphone', 4),
                                  ('Laptop', 4),
                                  ('Tablet', 3);
                             """
query_insert_products = """
                            INSERT INTO products_erick (product_name, group_id,price)
                            VALUES
                              ('Microsoft Lumia', 1, 200),
                              ('HTC One', 1, 400),
                              ('Nexus', 1, 500),
                              ('iPhone', 1, 900),
                              ('HP Elite', 2, 1200),
                              ('Lenovo Thinkpad', 2, 700),
                              ('Sony VAIO', 2, 700),
                              ('Dell Vostro', 2, 800),
                              ('iPad', 3, 700),
                              ('Kindle Fire', 3, 150),
                              ('Samsung Galaxy Tab', 3, 200);
                        """

In [6]:
with engine.connect() as connection:
    connection.execute(query_insert_product_group)
    connection.execute(query_insert_products)

## Reading

In [4]:
with engine.connect() as connection:
    result_groups = connection.execute('SELECT * FROM product_groups_erick')
    print(result_groups.fetchall())
    result_products = connection.execute('SELECT * FROM products_erick')
    print(result_products.fetchall())

[(1, 'Smartphone', 4), (2, 'Laptop', 4), (3, 'Tablet', 3)]
[(1, 'Microsoft Lumia', Decimal('200.00'), 1), (2, 'HTC One', Decimal('400.00'), 1), (3, 'Nexus', Decimal('500.00'), 1), (4, 'iPhone', Decimal('900.00'), 1), (5, 'HP Elite', Decimal('1200.00'), 2), (6, 'Lenovo Thinkpad', Decimal('700.00'), 2), (7, 'Sony VAIO', Decimal('700.00'), 2), (8, 'Dell Vostro', Decimal('800.00'), 2), (9, 'iPad', Decimal('700.00'), 3), (10, 'Kindle Fire', Decimal('150.00'), 3), (11, 'Samsung Galaxy Tab', Decimal('200.00'), 3)]


## Windows Functions

In [5]:
query = """
        SELECT
          group_name,
          AVG (price)
        FROM
          products_erick
        INNER JOIN product_groups_erick USING (group_id)
        GROUP BY
          group_name;
        """

In [6]:
pd.read_sql(sql= query, con= engine, index_col=None)

Unnamed: 0,group_name,avg
0,Smartphone,500.0
1,Tablet,350.0
2,Laptop,850.0


In [13]:
query = """ 
        SELECT
        product_name,
        price,
        group_name,
        AVG (price) OVER (
          PARTITION BY group_name
        ) as group_average_price
        FROM products_erick
        INNER JOIN product_groups_erick USING (group_id);
      """
pd.read_sql(sql= query, con= engine, index_col=None)

Unnamed: 0,product_name,price,group_name,group_average_price
0,HP Elite,1200.0,Laptop,850.0
1,Lenovo Thinkpad,700.0,Laptop,850.0
2,Sony VAIO,700.0,Laptop,850.0
3,Dell Vostro,800.0,Laptop,850.0
4,Microsoft Lumia,200.0,Smartphone,500.0
5,HTC One,400.0,Smartphone,500.0
6,Nexus,500.0,Smartphone,500.0
7,iPhone,900.0,Smartphone,500.0
8,iPad,700.0,Tablet,350.0
9,Kindle Fire,150.0,Tablet,350.0


## DenseRank

## Ejercicio

Ejecutar query en motor de postgre sobre las tablas products y product_groups para obtener obtener el producto de precio máximo de cada grupo.

In [15]:
query = """ 
        SELECT 
          product_name,
          price,
          group_name
          FROM
            (   SELECT
                product_name,
                price,
                group_name,
                MAX (price) OVER (
                  PARTITION BY group_name
                ) as group_MAX_price
                FROM products
                INNER JOIN product_groups USING (group_id)
            ) as products_with_max
          WHERE group_MAX_price = price
      """
pd.read_sql(sql= query, con= engine, index_col=None)

Unnamed: 0,product_name,price,group_name
0,HP Elite,1200.0,Laptop
1,iPhone,900.0,Smartphone
2,iPad,700.0,Tablet


In [7]:
query = """ 
        DROP TABLE IF EXISTS temp_example;
        CREATE TEMPORARY TABLE temp_example AS
        (
                SELECT
                product_name,
                price,
                group_name,
                MAX (price) OVER (
                  PARTITION BY group_name
                ) as group_MAX_price
                FROM products
                INNER JOIN product_groups USING (group_id)
        );
        """
with engine.connect() as connection:
    result_products = connection.execute(query)

In [8]:
query = """ SELECT 
            product_name,
            price,
            group_name
            FROM temp_example
            WHERE group_MAX_price = price
      """
pd.read_sql(sql= query, con= engine, index_col=None)

Unnamed: 0,product_name,price,group_name
0,HP Elite,1200.0,Laptop
1,iPhone,900.0,Smartphone
2,iPad,700.0,Tablet


In [16]:
query = """ 
        SELECT
        product_name,
        price,
        group_name,
        MIN (price) OVER (
          PARTITION BY group_name
        ) as min_price_in_group,
        MAX (price) OVER (
          PARTITION BY group_name
        ) as max_price_in_group
        FROM products
        INNER JOIN product_groups USING (group_id);
      """
pd.read_sql(sql= query, con= engine, index_col=None)

Unnamed: 0,product_name,price,group_name,min_price_in_group,max_price_in_group
0,HP Elite,1200.0,Laptop,700.0,1200.0
1,Lenovo Thinkpad,700.0,Laptop,700.0,1200.0
2,Sony VAIO,700.0,Laptop,700.0,1200.0
3,Dell Vostro,800.0,Laptop,700.0,1200.0
4,Microsoft Lumia,200.0,Smartphone,200.0,900.0
5,HTC One,400.0,Smartphone,200.0,900.0
6,Nexus,500.0,Smartphone,200.0,900.0
7,iPhone,900.0,Smartphone,200.0,900.0
8,iPad,700.0,Tablet,150.0,700.0
9,Kindle Fire,150.0,Tablet,150.0,700.0


## Drop tables

In [23]:
with engine.connect() as connection:
    connection.execute("""drop table if exists products_erick""")
    connection.execute("""drop table if exists product_groups_erick""")    

# Ejemplo MapReduce

In [9]:
def find_longest_string(list_of_strings):
    longest_string = None #Inicializo la lista vacía
    longest_string_len = 0  #Inicializo la palabra de mayor tamaño como cero (por la lista vacía)
    for s in list_of_strings:  #Por cada string de la lista me fijo si es mayor que el anterior y actualizo si lo es
        if len(s) > longest_string_len:
            longest_string_len = len(s)
            longest_string = s
    return longest_string

In [19]:
large_list_of_strings = ['abc','python', 'loga']
find_longest_string(large_list_of_strings)

'python'

In [20]:
list_of_strings = large_list_of_strings.copy()

In [21]:
large_list_of_strings = list_of_strings*100000000
%time print(find_longest_string(large_list_of_strings))

python
Wall time: 23.6 s


In [42]:
my_list = [13, 2, 6, 1, 5, 9, 6]
for i in range(0, len(my_list), 3):
    print(my_list[i: i+3])

[13, 2, 6]
[1, 5, 9]
[6]


Con numpy

In [43]:
import numpy as np

In [49]:
np.array_split(my_list, 3)

[array([13,  2,  6]), array([1, 5]), array([9, 6])]

## Map

In [24]:
#Ejemplo de map para multiplicar por 2 a cada elemento de una lista
def doble(num):
    return num*2
ejemplo = list(map( doble, [1,2,3] ))
ejemplo

[2, 4, 6]

In [25]:
#Ejemplo de map para dividir por 2 si el número es par o sumar 1 y luego dividir por 2 si es impar
ejemplo = list(map( lambda x: x/2 if x%2==0 else (x+1)/2, [10,15,30] ))
ejemplo

[5.0, 8.0, 15.0]

## Reduce

In [26]:
from functools import reduce

In [27]:
#Ejemplo para encontrar el acumulado total de una lista de números
ejemplo = reduce(lambda x,y: x+y, [1,3,6])
ejemplo

10

## Ejercicios

1) Dada una lista de plabras retornar la misma lista pero donde a cada palabra se le agregue el prefijo "prefix_"
Ejemplo: ["hola", "azul"] --> ["prefix_hola", "prefix_azul"]

In [58]:
add_prefix = lambda word: f'prefix_{word}'
words = ['hola','mundo']
list(map(add_prefix, words))

['prefix_hola', 'prefix_mundo']

2) Dada una lista de fechas (string) en formato yyyy-mm-dd (ej. 2021-03-15) devuelva una tupla con la siguiente estructura (año, mes, día).
Ejemplo: ["2021-02-01", "2019-05-12"] --> [(2021, 02, 01), (2019, 05, 12)]

In [63]:
dates = ['2021-02-01', '2019-05-12']
def split_date(date_text):
    return tuple(date_text.split('-'))
list(map(split_date, dates))

[('2021', '02', '01'), ('2019', '05', '12')]

3) Utilizar la función incorporada map() para crear una función que retorne una lista con la longitud de cada palabra de una frase. La función recibe una cadena de texto y retornara una lista.
Ejemplo: "Hola, ¿cómo estás?" -->  [4, 4, 5]
Ayuda: quitar caracteres especiales. Separar por espacio.

In [84]:
import re
text = 'Hola, ¿cómo estás?'
#text = text.split(' ')
print(text)
reg_pattern = re.compile('^[\w-]+$')
reg_pattern.search(text)
#text = [i if i.isalpha() else ' ' for i in text ]
text

Hola, ¿cómo estás?


'Hola, ¿cómo estás?'

In [91]:
words = [('ghdfdf',3),('df',1)]
get_min = lambda tuple_words: reduce(lambda x,y: x if x[1]<=y[1] else y, tuple_words)
get_min(words)

('df', 1)