In [None]:
#!pip install sqlalchemy jupysql

##  SQL vs Pandas

In [None]:
import pandas as pd
import numpy as np
import sqlalchemy
from pathlib import Path
%load_ext sql

In [None]:
# Run this cell to connect to database
engine = sqlalchemy.create_engine('sqlite:///starwars.db')
connection = engine.connect()

In [None]:
%sql engine


In [None]:
%%sql
SELECT * FROM sqlite_master WHERE type='table';

In [None]:
%%sql
# Display the first 5 rows of characters table
SELECT * FROM characters LIMIT 5;


In [None]:
# Read the table into a pandas DataFrame
characters_df = pd.read_sql("SELECT * FROM characters", connection)

characters_df

In [None]:
characters_df.dtypes

##  Lets compare some SQL to Pandas commands   

Demo 1 - Select Data ( columns)

In [None]:
%sql SELECT name, species FROM characters

and in Pandas

In [None]:
characters_df[['name', 'species']]

Demo 2 - Filtering

In [None]:
%%sql
SELECT * FROM characters
WHERE  height > 200 AND species = 'Human'

Oops that didn't work - because apparently height is a string and so the boolean about hieght didnt filter

One approach is to ask SQL to treat that column as an integer

`CAST()` is a SQL function that converts one data type into another. This is useful when a column is stored as text (TEXT or VARCHAR), but you need to perform numeric operations on it.


In [None]:
%%sql
SELECT * FROM characters
WHERE CAST(height AS INTEGER) > 200 
AND species = 'Human';

In [None]:
characters_df

in Pandas we also need to check the type of height - if it wasnt a float in SQL it probably isnt in Pandas

In [None]:
characters_df['height'].dtype

convert height to numeric

In [None]:
characters_df['height'] = pd.to_numeric(characters_df['height'], errors='coerce')
characters_df['height'].dtype

In [None]:
characters_df[(characters_df['height'] > 200) & (characters_df['species'] == 'Human')]

Demo 3 - aggregate data

Average height by species



In [None]:
%%sql
SELECT species, AVG(CAST(height AS INTEGER)) AS avg_height
FROM characters
GROUP BY species;

And in Pandas

In [None]:
species_avg_height = characters_df.groupby('species')['height'].mean()

species_avg_height

Demo 4 - 

4.1 Let's join two tables characters and planets

4.2 Let's look for Humans from the desert

In [None]:
%%sql
SELECT characters.name, planets.name AS homeworld
FROM characters
JOIN planets ON characters.homeworld = planets.name;

In [None]:
%%sql
SELECT characters.name, characters.homeworld, characters.species, planets.climate, planets.population, planets.terrain
FROM characters
JOIN planets ON characters.homeworld = planets.name;

In [None]:
%%sql
SELECT characters.name, characters.homeworld, characters.species, planets.climate, planets.population, planets.terrain
FROM characters
JOIN planets ON characters.homeworld = planets.name
WHERE LOWER(characters.species) = 'human' 
AND LOWER(planets.terrain) = 'desert';

In [None]:
planets_df = pd.read_sql("SELECT * FROM planets", connection)


In [None]:
charactersXplanets= characters_df[['name', 'species','homeworld']].merge(planets_df, left_on='homeworld', right_on='name')
charactersXplanets

Get a subset of the humans from the desert from merged data


In [None]:
charactersXplanets[(charactersXplanets['species'] == 'Human') & (charactersXplanets['terrain'] == 'desert')]


Finally - Pandas does have a `.query()` command that is similar to sql

In [168]:
# use .query 
human_tatooine_df = characters_df.query("species == 'Human' and homeworld == 'Tatooine'")
human_tatooine_df

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,species
0,Luke Skywalker,172.0,77.0,blond,fair,blue,19BBY,male,Tatooine,Human
3,Darth Vader,202.0,136.0,none,white,yellow,41.9BBY,male,Tatooine,Human
5,Owen Lars,178.0,120.0,"brown, grey",light,blue,52BBY,male,Tatooine,Human
6,Beru Whitesun lars,165.0,75.0,brown,light,blue,47BBY,female,Tatooine,Human
8,Biggs Darklighter,183.0,84.0,black,light,brown,24BBY,male,Tatooine,Human
10,Anakin Skywalker,188.0,84.0,blond,fair,blue,41.9BBY,male,Tatooine,Human
40,Shmi Skywalker,163.0,,black,fair,brown,72BBY,female,Tatooine,Human
58,Cliegg Lars,183.0,,brown,fair,blue,82BBY,male,Tatooine,Human


In [169]:
droid_df = characters_df.query("species == 'Droid'")
droid_df

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,species
1,C-3PO,167.0,75.0,,gold,yellow,112BBY,,Tatooine,Droid
2,R2-D2,96.0,32.0,,"white, blue",red,33BBY,,Naboo,Droid
7,R5-D4,97.0,32.0,,"white, red",red,,,Tatooine,Droid
21,IG-88,200.0,140.0,none,metal,red,15BBY,none,,Droid
84,BB8,,,none,none,black,,none,,Droid


In [170]:
droid_tattoine_df = characters_df.query("species == 'Droid' and homeworld == 'Tatooine'")
droid_tattoine_df

Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,species
1,C-3PO,167.0,75,,gold,yellow,112BBY,,Tatooine,Droid
7,R5-D4,97.0,32,,"white, red",red,,,Tatooine,Droid


In [171]:
%sql SELECT * FROM characters WHERE species = 'Human' AND homeworld = 'Tatooine';

name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,species
Luke Skywalker,172,77.0,blond,fair,blue,19BBY,male,Tatooine,Human
Darth Vader,202,136.0,none,white,yellow,41.9BBY,male,Tatooine,Human
Owen Lars,178,120.0,"brown, grey",light,blue,52BBY,male,Tatooine,Human
Beru Whitesun lars,165,75.0,brown,light,blue,47BBY,female,Tatooine,Human
Biggs Darklighter,183,84.0,black,light,brown,24BBY,male,Tatooine,Human
Anakin Skywalker,188,84.0,blond,fair,blue,41.9BBY,male,Tatooine,Human
Shmi Skywalker,163,,black,fair,brown,72BBY,female,Tatooine,Human
Cliegg Lars,183,,brown,fair,blue,82BBY,male,Tatooine,Human


In [172]:
%sql SELECT * FROM characters WHERE species = 'Droid' AND homeworld = 'Tatooine';

name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,species
C-3PO,167,75,,gold,yellow,112BBY,,Tatooine,Droid
R5-D4,97,32,,"white, red",red,,,Tatooine,Droid
