# SQL Exercise
For this exercise we will use a database about Pokemons. **The file "pokemon.sqlite" must be in the same folder as the notebook**.
This database contains a single table called `pokemon`

In [None]:
import sqlite3
import pandas as pd

In [None]:
import os
from pathlib import Path

filepath = Path(os.getcwd()).parent / "data" / "pokemon.sqlite"
filepath.exists()


In [None]:
# Connect with the database pokemon.sqlite
connection = sqlite3.connect(filepath)

# Obtain a cursor to execute the queries
crsr = connection.cursor()


# Use a function to read the data and convert to a DataFrame
def sql_query(query):
    crsr.execute(query)
    ans = crsr.fetchall()
    names = [description[0] for description in crsr.description]
    return pd.DataFrame(ans, columns=names)

In [None]:
# 1. Obtain a table with all fields
query = "SELECT name FROM sqlite_master WHERE type='table'"

sql_query(query)

In [None]:
query = "SELECT * FROM pokemon"

sql_query(query)

In [None]:
# 2. Obtain a table with the fields "Name", "Type 1", "Type 2"
query = """
SELECT Name, "Type 1", "Type 2"
FROM pokemon
"""

sql_query(query)

In [None]:
# 3. What are the unique "Type 1" types ?
query = """
SELECT DISTINCT "Type 1"
FROM pokemon
"""

sql_query(query)

In [None]:
# 4. What are all "Type 1" and "Type 2" combinations ?
query = """
SELECT DISTINCT "Type 1", "Type 2"
FROM pokemon
"""

sql_query(query)

In [None]:
# 5. What are all water type pokemons ?
query = """
SELECT Name, "Type 1"
FROM pokemon
WHERE "Type 1" LIKE "Water"
"""

sql_query(query)

In [None]:
# 6. Obtain a table with legendary pokemons
query = """
SELECT *
FROM pokemon
"""

sql_query(query)

In [None]:
query = """
SELECT Name
FROM pokemon
WHERE "Legendary" = 1
"""

sql_query(query)

In [None]:
# 7. Obtain a table with the Legendary Fire pokemon
query = """
SELECT Name, "Type 1"
FROM pokemon
WHERE "Legendary" = 1
AND "Type 1" LIKE "Fire"
"""

sql_query(query)

In [None]:
# 8. Obtain a table with the pokemon with defense above 100 points. Sort by attack
query = """
SELECT *
FROM pokemon
WHERE Defense > 100
ORDER BY Attack DESC
"""

sql_query(query)

In [None]:
# 9. Obtain a table with all first generation pokemons that are NOT Psychic and have a speed above 130
query = """
SELECT *
FROM pokemon
WHERE Generation = 1
AND "Type 1" IS NOT "Psychic"
AND Speed > 130
"""

sql_query(query)

In [None]:
# 10. Which is the pokemon with the highest attack ?
query = """
SELECT *, MAX(Attack)
FROM pokemon
"""

sql_query(query)

In [None]:
query = """
SELECT *
FROM pokemon
ORDER BY Attack DESC
LIMIT 1
"""

sql_query(query)

In [None]:
# 11. Which is the mean defense of all pokemon ?
query = """
SELECT AVG(Defense)
FROM pokemon
"""

sql_query(query)

In [None]:
# 12. Which is the mean defense by generation ?
query = """
SELECT Generation, AVG(Defense)
FROM pokemon
GROUP BY "Generation"
"""

sql_query(query)

In [None]:
# 13. Calculate maximum HP in Type 1
query = """
SELECT Generation, MAX(HP)
FROM pokemon
GROUP BY "Generation"
"""

sql_query(query)

In [None]:
query = """
SELECT Generation, AVG(Defense), AVG(Attack), AVG(HP), COUNT(Name)
FROM pokemon
GROUP BY "Generation"
"""

sql_query(query)

In [None]:
df = pd.read_sql("SELECT * FROM pokemon", connection)
df

In [None]:
import matplotlib.pyplot as plt

colors = ["red", "green", "blue", "orange", "grey", "pink"]

fig, ax = plt.subplots(figsize=(10, 6))
for color, generation in zip(colors, df.Generation.unique()):
    dg = df.loc[df.Generation == generation]
    ax.scatter(dg.Defense, dg.Attack, c=color, label=generation)
ax.legend()

In [None]:
colors = ["red", "green", "blue", "orange", "grey", "pink"]
nrows = 2
ncols = 3
plotting_list = []
for row in range(nrows):
    for col in range(ncols):
        plotting_list.append((row, col))
print(plotting_list)

fig, axs = plt.subplots(nrows, ncols, figsize=(10, 6))

for ix, (color, generation, (row, col)) in enumerate(
    zip(colors, df.Generation.unique(), plotting_list)
):
    # print(row, col, ix, color, generation)
    dg = df.loc[df.Generation == generation].copy()
    axs[row][col].scatter(dg.Defense, dg.Attack, c=color, label=generation)
    axs[row][col].set_xlim((0, 200))
    axs[row][col].set_ylim((0, 200))
# axs.legend()

In [None]:
import seaborn as sns

sns.scatterplot(
    x="Defense",
    y="Attack",
    data=df,
    hue="Generation",
    palette="tab10",
    size="HP",
    style="Type 1",
)