Consultas -- Avanzado
===

In [1]:
%load_ext bigdata
%hive_init

Hive initialized!


In [2]:
%%hive
DROP DATABASE IF EXISTS SQLdb CASCADE;
CREATE DATABASE SQLdb;
USE SQLdb;

CREATE TABLE persons (
    id INT,
    firstname VARCHAR(10),
    surname VARCHAR(10),
    birthday DATE,
    color VARCHAR(10),
    quantity INT
);

INSERT INTO persons VALUES
    (1,"Vivian","Hamilton","1971-07-08","green",1),
    (2,"Karen","Holcomb","1974-05-23","green",4),
    (3,"Cody","Garrett","1973-04-22","orange",1),
    (4,"Roth","Fry","1975-01-29","black",1),
    (5,"Zoe","Conway","1974-07-03","blue",2),
    (6,"Gretchen","Kinney","1974-10-18","violet",1),
    (7,"Driscoll","Klein","1970-10-05","blue",5),
    (8,"Karyn","Diaz","1969-02-24","red",1),
    (9,"Merritt","Guy","1974-10-17","indigo",4),
    (10,"Kylan","Sexton","1975-02-28","black",4),
    (11,"Jordan","Estes","1969-12-07","indigo",4),
    (12,"Hope","Coffey","1973-12-24","green",5),
    (13,"Vivian","Crane","1970-08-27","gray",5),
    (14,"Clio","Noel","1972-12-12","red",5),
    (15,"Hope","Silva","1970-07-01","blue",5),
    (16,"Ayanna","Jarvis","1974-02-11","orange",5),
    (17,"Chanda","Boyer","1973-04-01","green",4),
    (18,"Chadwick","Knight","1973-04-29","yellow",1);


---

## Strings

### CONCAT()

In [3]:
%%hive
SELECT 
    CONCAT(firstname,'@',surname),  
    quantity 
FROM 
    persons;

Vivian@Hamilton	1
Karen@Holcomb	4
Cody@Garrett	1
Roth@Fry	1
Zoe@Conway	2
Gretchen@Kinney	1
Driscoll@Klein	5
Karyn@Diaz	1
Merritt@Guy	4
Kylan@Sexton	4
Jordan@Estes	4
Hope@Coffey	5
Vivian@Crane	5
Clio@Noel	5
Hope@Silva	5
Ayanna@Jarvis	5
Chanda@Boyer	4
Chadwick@Knight	1


### LENGTH()

In [4]:
%%hive
SELECT surname, LENGTH(surname), CHAR_LENGTH(surname) FROM persons LIMIT 5;

Hamilton	8	8
Holcomb	7	7
Garrett	7	7
Fry	3	3
Conway	6	6


### UPPER(), LOWER()

In [5]:
%%hive
SELECT surname, UPPER(surname), LOWER(surname) FROM persons LIMIT 5;

Hamilton	HAMILTON	hamilton
Holcomb	HOLCOMB	holcomb
Garrett	GARRETT	garrett
Fry	FRY	fry
Conway	CONWAY	conway


In [6]:
%%hive
--
-- Condicional para strings
--
SELECT surname, surname BETWEEN 'c' AND 'k' FROM persons LIMIT 5;

Hamilton	false
Holcomb	false
Garrett	false
Fry	false
Conway	false


### LIKE

La función LIKE permite buscar patrones específicos dentro de cadenas de caracteres.

    > LIKE 'a%'   : Encuentra los valores que inician con 'a'
    > LIKE '%a'   : Encuentra los valores que terminan con 'a'
    > LIKE '%a%'  : Encuentra los valores que contengan 'a' en cualquier posición
    > LIKE '_a%'  : Encuentra los valores que contengan 'a' en la segunda posición
    > LIKE 'a_%_%': Encuentra los valores que inician con 'a' y tienen al menos 3 caracteres
    > LIKE 'a%o'  : Encuentra los valores que inician con 'a' y terminan con 'o'

In [7]:
%%hive
SELECT * FROM persons WHERE color LIKE 'b%';

4	Roth	Fry	1975-01-29	black	1
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
10	Kylan	Sexton	1975-02-28	black	4
15	Hope	Silva	1970-07-01	blue	5


In [8]:
%%hive
SELECT * FROM persons WHERE color NOT LIKE 'b%';

1	Vivian	Hamilton	1971-07-08	green	1
2	Karen	Holcomb	1974-05-23	green	4
3	Cody	Garrett	1973-04-22	orange	1
6	Gretchen	Kinney	1974-10-18	violet	1
8	Karyn	Diaz	1969-02-24	red	1
9	Merritt	Guy	1974-10-17	indigo	4
11	Jordan	Estes	1969-12-07	indigo	4
12	Hope	Coffey	1973-12-24	green	5
13	Vivian	Crane	1970-08-27	gray	5
14	Clio	Noel	1972-12-12	red	5
16	Ayanna	Jarvis	1974-02-11	orange	5
17	Chanda	Boyer	1973-04-01	green	4
18	Chadwick	Knight	1973-04-29	yellow	1


In [9]:
%%hive
--
-- Conectores lógicos (AND).
--
SELECT * FROM persons WHERE color = 'blue' AND firstname LIKE 'Z%';

5	Zoe	Conway	1974-07-03	blue	2


In [10]:
%%hive
--
-- Conectores lógicos (OR).
--
SELECT * FROM persons WHERE color = 'blue' OR firstname LIKE 'K%';

2	Karen	Holcomb	1974-05-23	green	4
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
8	Karyn	Diaz	1969-02-24	red	1
10	Kylan	Sexton	1975-02-28	black	4
15	Hope	Silva	1970-07-01	blue	5


In [11]:
%%hive
--
-- Condicional para campos string
--
SELECT * FROM persons WHERE color IN ('blue','black');

4	Roth	Fry	1975-01-29	black	1
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
10	Kylan	Sexton	1975-02-28	black	4
15	Hope	Silva	1970-07-01	blue	5


In [12]:
%%hive
--
-- Condicional para campos string
--
SELECT * FROM persons WHERE color NOT IN ('blue','black');

1	Vivian	Hamilton	1971-07-08	green	1
2	Karen	Holcomb	1974-05-23	green	4
3	Cody	Garrett	1973-04-22	orange	1
6	Gretchen	Kinney	1974-10-18	violet	1
8	Karyn	Diaz	1969-02-24	red	1
9	Merritt	Guy	1974-10-17	indigo	4
11	Jordan	Estes	1969-12-07	indigo	4
12	Hope	Coffey	1973-12-24	green	5
13	Vivian	Crane	1970-08-27	gray	5
14	Clio	Noel	1972-12-12	red	5
16	Ayanna	Jarvis	1974-02-11	orange	5
17	Chanda	Boyer	1973-04-01	green	4
18	Chadwick	Knight	1973-04-29	yellow	1


### REGEXP

In [13]:
%%hive
--
-- El primer caracter es una 'b'
--
SELECT * FROM persons WHERE color REGEXP '^b';

4	Roth	Fry	1975-01-29	black	1
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
10	Kylan	Sexton	1975-02-28	black	4
15	Hope	Silva	1970-07-01	blue	5


In [14]:
%%hive
--
-- El primer caracter no es una 'b'
--
SELECT * FROM persons WHERE color REGEXP '^[^b]';

1	Vivian	Hamilton	1971-07-08	green	1
2	Karen	Holcomb	1974-05-23	green	4
3	Cody	Garrett	1973-04-22	orange	1
6	Gretchen	Kinney	1974-10-18	violet	1
8	Karyn	Diaz	1969-02-24	red	1
9	Merritt	Guy	1974-10-17	indigo	4
11	Jordan	Estes	1969-12-07	indigo	4
12	Hope	Coffey	1973-12-24	green	5
13	Vivian	Crane	1970-08-27	gray	5
14	Clio	Noel	1972-12-12	red	5
16	Ayanna	Jarvis	1974-02-11	orange	5
17	Chanda	Boyer	1973-04-01	green	4
18	Chadwick	Knight	1973-04-29	yellow	1


In [15]:
%%hive
--
-- 
--
SELECT * FROM persons WHERE color REGEXP 'blue|green';

1	Vivian	Hamilton	1971-07-08	green	1
2	Karen	Holcomb	1974-05-23	green	4
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
12	Hope	Coffey	1973-12-24	green	5
15	Hope	Silva	1970-07-01	blue	5
17	Chanda	Boyer	1973-04-01	green	4


In [16]:
%%hive
--
-- El segundo caracter es una 'r'
--
SELECT * FROM persons WHERE color REGEXP '.r';

1	Vivian	Hamilton	1971-07-08	green	1
2	Karen	Holcomb	1974-05-23	green	4
3	Cody	Garrett	1973-04-22	orange	1
12	Hope	Coffey	1973-12-24	green	5
13	Vivian	Crane	1970-08-27	gray	5
16	Ayanna	Jarvis	1974-02-11	orange	5
17	Chanda	Boyer	1973-04-01	green	4


In [17]:
%%hive
--
-- Finaliza en una vocal
--
SELECT * FROM persons WHERE color REGEXP '[aeiou]$';

3	Cody	Garrett	1973-04-22	orange	1
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
9	Merritt	Guy	1974-10-17	indigo	4
11	Jordan	Estes	1969-12-07	indigo	4
15	Hope	Silva	1970-07-01	blue	5
16	Ayanna	Jarvis	1974-02-11	orange	5


In [18]:
%%hive
--
-- Inicia con una mayúscula
-- REVISAR: HIVE PARECE NO RECIBIR CAMPOS COMO MySQL
--
SELECT * FROM persons WHERE color REGEXP '^[[:upper:]]';

8	Karyn	Diaz	1969-02-24	red	1
14	Clio	Noel	1972-12-12	red	5


### MID(), SUBSTRING()

In [19]:
%%hive
--
--
--
SELECT 
    firstname, 
    SUBSTRING(firstname, 0, 2), -- LEFT()
    SUBSTRING(firstname, 3, 2), 
    SUBSTRING(firstname, -3)    -- RIGHT()
FROM 
    persons;

Vivian	Vi	vi	ian
Karen	Ka	re	ren
Cody	Co	dy	ody
Roth	Ro	th	oth
Zoe	Zo	e	Zoe
Gretchen	Gr	et	hen
Driscoll	Dr	is	oll
Karyn	Ka	ry	ryn
Merritt	Me	rr	itt
Kylan	Ky	la	lan
Jordan	Jo	rd	dan
Hope	Ho	pe	ope
Vivian	Vi	vi	ian
Clio	Cl	io	lio
Hope	Ho	pe	ope
Ayanna	Ay	an	nna
Chanda	Ch	an	nda
Chadwick	Ch	ad	ick


In [20]:
%%hive
--
-- Por posición
--
SELECT  
    firstname,
    SUBSTRING_INDEX(firstname, 'a', 1),
    SUBSTRING_INDEX(firstname, 'a', -1)
FROM 
    persons;

Vivian	Vivi	n
Karen	K	ren
Cody	Cody	Cody
Roth	Roth	Roth
Zoe	Zoe	Zoe
Gretchen	Gretchen	Gretchen
Driscoll	Driscoll	Driscoll
Karyn	K	ryn
Merritt	Merritt	Merritt
Kylan	Kyl	n
Jordan	Jord	n
Hope	Hope	Hope
Vivian	Vivi	n
Clio	Clio	Clio
Hope	Hope	Hope
Ayanna	Ay	
Chanda	Ch	
Chadwick	Ch	dwick


In [21]:
%%hive
--
-- Operadores relacionales con strings
-- Estas comparaciones no funcionan con Hive
--
SELECT firstname FROM persons WHERE SUBSTRING(firstname, 0, 1) >= 'm';

### LOCATE()

In [22]:
%%hive
--
-- Retorna la posición del substring
--
SELECT firstname, LOCATE('ia', firstname) FROM persons;

Vivian	4
Karen	0
Cody	0
Roth	0
Zoe	0
Gretchen	0
Driscoll	0
Karyn	0
Merritt	0
Kylan	0
Jordan	0
Hope	0
Vivian	4
Clio	0
Hope	0
Ayanna	0
Chanda	0
Chadwick	0


## Fecha y hora

### DATE_FORMAT()

In [23]:
%%hive
--
-- Formatos para el año
--
SELECT 
    birthday, 
    DATE_FORMAT(birthday, "yyyy   yy")
FROM 
    persons
LIMIT
    5;

1971-07-08	1971   71
1974-05-23	1974   74
1973-04-22	1973   73
1975-01-29	1975   75
1974-07-03	1974   74


In [24]:
%%hive
--
-- Formatos para el mes
--
SELECT 
    birthday, 
    DATE_FORMAT(birthday, "MMM MM M")
FROM 
    persons
LIMIT
    5;

1971-07-08	jul 07 7
1974-05-23	may 05 5
1973-04-22	abr 04 4
1975-01-29	ene 01 1
1974-07-03	jul 07 7


In [25]:
%%hive
--
-- Formatos para el dia
--
SELECT 
    birthday, 
    DATE_FORMAT(birthday, "dd  d EEE EEEE")
FROM 
    persons
LIMIT
    5;

1971-07-08	08  8 jue jueves
1974-05-23	23  23 jue jueves
1973-04-22	22  22 dom domingo
1975-01-29	29  29 mié miércoles
1974-07-03	03  3 mié miércoles


In [26]:
%%hive
SELECT 
    month,
    count(1) AS count
FROM 
    (SELECT split(birthday, '-')[1] AS month FROM persons) l
GROUP BY
    month;

01	1
02	3
04	3
05	1
07	3
08	1
10	3
12	3


### CURRENT_DATE(), CURRENT_TIMESTAMP()

In [27]:
%%hive
SELECT CURRENT_DATE(), CURRENT_TIMESTAMP();

2018-10-20	2018-10-20 18:42:39.957


### YEAR(), MONTH(), DAYNAME(), ...

In [28]:
%%hive
DROP TABLE IF EXISTS tbltime; 
CREATE TABLE tbltime (ts TIMESTAMP, d DATE);
INSERT INTO tbltime VALUES
    ("2018-08-23 14:23:18", "2018-08-23"),
    ("2016-05-14 05:45:23", "2016-05-14"),
    ("2017-07-01 11:10:45", "2017-07-01");
SELECT * FROM tbltime;

2018-08-23 14:23:18	2018-08-23
2016-05-14 05:45:23	2016-05-14
2017-07-01 11:10:45	2017-07-01


In [29]:
%%hive
SELECT
    ts,
    YEAR(ts),
    MONTH(ts),
    date_format(ts, 'MMM'),
    date_format(ts, 'MMMMM')
FROM
    tbltime;

2018-08-23 14:23:18	2018	8	ago	agosto
2016-05-14 05:45:23	2016	5	may	mayo
2017-07-01 11:10:45	2017	7	jul	julio


In [30]:
%%hive
SELECT
    ts,
    DAY(ts),
    DAYOFMONTH(ts),
    DATE_FORMAT(ts, 'EEE'),
    DATE_FORMAT(ts, 'EEEE'),
    DAYOFWEEK(ts)
FROM
    tbltime;

2018-08-23 14:23:18	23	23	jue	jueves	5
2016-05-14 05:45:23	14	14	sáb	sábado	7
2017-07-01 11:10:45	1	1	sáb	sábado	7


In [31]:
%%hive
SELECT
    ts,
    HOUR(ts),
    MINUTE(ts),
    SECOND(ts)
FROM
    tbltime;

2018-08-23 14:23:18	14	23	18
2016-05-14 05:45:23	5	45	23
2017-07-01 11:10:45	11	10	45


In [32]:
%%hive
SELECT
    ts,
    EXTRACT(YEAR FROM ts),
    EXTRACT(MONTH FROM ts),
    EXTRACT(HOUR FROM ts),
    EXTRACT(HOUR FROM ts),
    EXTRACT(MINUTE FROM ts),
    EXTRACT(SECOND FROM ts)
FROM
    tbltime;

2018-08-23 14:23:18	2018	8	14	14	23	18
2016-05-14 05:45:23	2016	5	5	5	45	23
2017-07-01 11:10:45	2017	7	11	11	10	45


### TIME_TO_SEC(), SEC_TO_TIME()

In [33]:
%%hive
-- estas funciones no existen en Hive
-- SELECT
--     t,
--     TIME_TO_SEC(t),
--     SEC_TO_TIME(TIME_TO_SEC(t))
-- FROM
--     tbltime;

### FROM_DAYS(), TO_DAYS()

In [34]:
%%hive
-- estas funciones no existen en Hive
-- SELECT
--     ts,
--     TO_DAYS(ts),
--     FROM_DAYS(TO_DAYS(ts))
-- FROM
--     tbltime;

### UNIX_TIMESTAMP, FROM_UNIXTIME()

In [35]:
%%hive
SELECT 
    ts,
    UNIX_TIMESTAMP(ts),
    FROM_UNIXTIME(UNIX_TIMESTAMP(ts))
FROM
    tbltime;

2018-08-23 14:23:18	1535034198	2018-08-23 14:23:18
2016-05-14 05:45:23	1463204723	2016-05-14 05:45:23
2017-07-01 11:10:45	1498907445	2017-07-01 11:10:45


### TIMEDIFF(), DATEDIFF(), TIMESTAMPDIFF()

In [36]:
%%hive
SELECT
    d,
    DATEDIFF("2018-12-30", d)
FROM
    tbltime;

2018-08-23	129
2016-05-14	960
2017-07-01	547


In [37]:
%%hive
-- Esta funcion no existe en hive
-- SELECT
--     t,
--     TIMEDIFF("23:05:18", t)
-- FROM
--     tbltime;

In [38]:
%%hive
-- Esta funcion no existe en Hive
-- SELECT
--     ts,
--     TIMESTAMPDIFF(YEAR, ts, "2018-12-31 23:05:18"),
--     TIMESTAMPDIFF(MONTH, ts, "2018-12-31 23:05:18"),
--     TIMESTAMPDIFF(WEEK, ts, "2018-12-31 23:05:18")
-- FROM
--     tbltime;

In [39]:
%%hive
SELECT
    ts,
    unix_timestamp("2018-12-31 23:05:18") - unix_timestamp(ts)
FROM
    tbltime;

2018-08-23 14:23:18	11263320
2016-05-14 05:45:23	83092795
2017-07-01 11:10:45	47390073


### ADDTIME()

In [40]:
%%hive
-- no existe en Hive
-- SELECT
--    t,
--    ADDTIME("23:05:18", t)
-- FROM
--    tbltime;

In [41]:
%%hive
SELECT
    d,
    DATE_ADD(d, 3),
    DATE_SUB(d, 3)
FROM
    tbltime;

2018-08-23	2018-08-26	2018-08-20
2016-05-14	2016-05-17	2016-05-11
2017-07-01	2017-07-04	2017-06-28


In [42]:
%%hive
SELECT
    d
FROM
    tbltime
WHERE 
    d BETWEEN "2017-01-01" AND "2017-12-31";

2017-07-01


## ORDER BY

In [43]:
%%hive
SELECT * FROM persons ORDER BY color;

4	Roth	Fry	1975-01-29	black	1
10	Kylan	Sexton	1975-02-28	black	4
5	Zoe	Conway	1974-07-03	blue	2
7	Driscoll	Klein	1970-10-05	blue	5
15	Hope	Silva	1970-07-01	blue	5
13	Vivian	Crane	1970-08-27	gray	5
2	Karen	Holcomb	1974-05-23	green	4
12	Hope	Coffey	1973-12-24	green	5
17	Chanda	Boyer	1973-04-01	green	4
1	Vivian	Hamilton	1971-07-08	green	1
9	Merritt	Guy	1974-10-17	indigo	4
11	Jordan	Estes	1969-12-07	indigo	4
3	Cody	Garrett	1973-04-22	orange	1
16	Ayanna	Jarvis	1974-02-11	orange	5
8	Karyn	Diaz	1969-02-24	red	1
14	Clio	Noel	1972-12-12	red	5
6	Gretchen	Kinney	1974-10-18	violet	1
18	Chadwick	Knight	1973-04-29	yellow	1


In [44]:
%%hive
SELECT * FROM tbltime ORDER BY ts;

2016-05-14 05:45:23	2016-05-14
2017-07-01 11:10:45	2017-07-01
2018-08-23 14:23:18	2018-08-23


In [45]:
%%hive
SELECT * FROM tbltime ORDER BY DAY(ts);

2017-07-01 11:10:45	2017-07-01
2016-05-14 05:45:23	2016-05-14
2018-08-23 14:23:18	2018-08-23


In [46]:
%%hive
--
-- Se altera el orden del ordenamiento
-- No funciona igual que sql
-- los resultados son diferentes
--
SELECT * FROM persons ORDER BY FIELD(color, 'blue');

18	Chadwick	Knight	1973-04-29	yellow	1
17	Chanda	Boyer	1973-04-01	green	4
16	Ayanna	Jarvis	1974-02-11	orange	5
15	Hope	Silva	1970-07-01	blue	5
14	Clio	Noel	1972-12-12	red	5
13	Vivian	Crane	1970-08-27	gray	5
12	Hope	Coffey	1973-12-24	green	5
11	Jordan	Estes	1969-12-07	indigo	4
10	Kylan	Sexton	1975-02-28	black	4
9	Merritt	Guy	1974-10-17	indigo	4
8	Karyn	Diaz	1969-02-24	red	1
7	Driscoll	Klein	1970-10-05	blue	5
6	Gretchen	Kinney	1974-10-18	violet	1
5	Zoe	Conway	1974-07-03	blue	2
4	Roth	Fry	1975-01-29	black	1
3	Cody	Garrett	1973-04-22	orange	1
2	Karen	Holcomb	1974-05-23	green	4
1	Vivian	Hamilton	1971-07-08	green	1


## GROUP BY

### COUNT()

In [47]:
%%hive
--
-- Cuenta cuantos registros hay en la tabla.
--
SELECT COUNT(*) FROM persons;

18


In [48]:
%%hive
--
-- Cuenta cuantas veces aparece cada año
-- de nacimiento en la tabla.
--
SELECT YEAR(birthday), COUNT(*) FROM persons GROUP BY YEAR(birthday);

1969	2
1970	3
1971	1
1972	1
1973	4
1974	5
1975	2


### MAX(), MIN(), SUM(), AVG()

In [49]:
%%hive
SELECT 
    color,
    MAX(quantity),
    MIN(quantity),
    SUM(quantity),
    AVG(quantity)
FROM
    persons
GROUP BY
    color;

black	4	1	5	2.5
blue	5	2	12	4.0
gray	5	5	5	5.0
green	5	1	14	3.5
indigo	4	4	8	4.0
orange	5	1	6	3.0
red	5	1	6	3.0
violet	1	1	1	1.0
yellow	1	1	1	1.0


In [50]:
%%hive
--
-- Selecciona los colores que tengan una cantidad
-- igual a la minima
--
SELECT 
    color,
    quantity
FROM
    persons
WHERE
    quantity = (SELECT MIN(quantity) FROM persons);

green	1
orange	1
black	1
violet	1
red	1
yellow	1


## LEAD, LAG

In [51]:
%%hive
SELECT 
    id, 
    ROW_NUMBER() OVER (ORDER BY id), 
    LEAD(id) OVER (ORDER BY id), 
    LAG(id) OVER (ORDER BY id) 
FROM 
    persons;

1	1	2	NULL
2	2	3	1
3	3	4	2
4	4	5	3
5	5	6	4
6	6	7	5
7	7	8	6
8	8	9	7
9	9	10	8
10	10	11	9
11	11	12	10
12	12	13	11
13	13	14	12
14	14	15	13
15	15	16	14
16	16	17	15
17	17	18	16
18	18	NULL	17


### FIRST_VALUE(), LAST_VALUE()

In [52]:
%%hive
SELECT color, quantity FROM persons ORDER BY color;

black	1
black	4
blue	2
blue	5
blue	5
gray	5
green	4
green	5
green	4
green	1
indigo	4
indigo	4
orange	1
orange	5
red	1
red	5
violet	1
yellow	1


In [53]:
%%hive
SELECT 
    color,
    quantity,
    FIRST_VALUE(quantity) OVER(PARTITION BY color ORDER BY quantity), 
    LAST_VALUE(quantity) OVER(PARTITION BY color ORDER BY quantity)
FROM 
    persons
ORDER BY
    color, quantity;

black	1	1	1
black	4	1	4
blue	2	2	2
blue	5	2	5
blue	5	2	5
gray	5	5	5
green	1	1	1
green	4	1	4
green	4	1	4
green	5	1	5
indigo	4	4	4
indigo	4	4	4
orange	1	1	1
orange	5	1	5
red	1	1	1
red	5	1	5
violet	1	1	1
yellow	1	1	1


### RANK(), DENSE_RANK(), CUME_DIST(), PERCENT_RANK()

In [None]:
%%hive
SELECT 
    color, 
    RANK() OVER (ORDER BY color), 
    DENSE_RANK() OVER (ORDER BY color),
    CUME_DIST() OVER (ORDER BY color),
    PERCENT_RANK() OVER (ORDER BY color)
FROM 
    persons;

In [None]:
%%hive
ANALYZE TABLE persons COMPUTE STATISTICS;

---