# SQL with pandas and SQLAlchemy
This notebook / kernel shows how to use pandas and SQLAlchemy to execute SQL queries on data given as csv files.

In [1]:
!mkdir datasets
!kaggle datasets download kaikarren/sql-with-pandas-and-sqlalchemy -p datasets

403 - Forbidden - Permission 'datasets.get' was denied


In [4]:
import zipfile
import os

with zipfile.ZipFile('datasets/googleplaystore.csv.zip', 'r') as f:
    f.extractall('datasets/')

os.listdir('datasets/')

['googleplaystore.csv', 'googleplaystore.csv.zip']


The content of the csv file.

In [1]:
import pandas as pd

df = pd.read_csv("datasets/googleplaystore.csv")

# TODO : show only the first 5 results
df.head(5)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [2]:
# There is one row that is incorrectly formatted 
# It's missing Category resulting in all other values being shifted to the left 
df.loc[[10472]]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,


In [3]:
# Let's just remove it for convinience
df.drop(10472, inplace=True)

In [4]:
# TODO : import sqlalchemy and create a sqlite engine
import sqlalchemy as sa

engine = sa.create_engine('sqlite:///datasets/googleplaystore.db')
conn = engine.connect()

# TODO : export the dataframe as a table 'playstore' to the sqlite engine
df.to_sql('playstore', conn, if_exists='replace')

10840

You can excute SQL queries in the following way.

In [5]:
query = sa.text("SELECT * FROM playstore")
result = engine.connect().execute(query)

In [6]:
# TODO : format the result as a dataframe.
df = pd.DataFrame(result.fetchall(), columns=result.keys())
df.set_index('index', inplace=True)

# TODO : only show first 5 results / tuples
df.head(5)


Unnamed: 0_level_0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [7]:
df['Size'].value_counts()

Size
Varies with device    1695
11M                    198
12M                    196
14M                    194
13M                    191
                      ... 
253k                     1
992k                     1
658k                     1
73k                      1
246k                     1
Name: count, Length: 461, dtype: int64

In [11]:

size_units = df['Size'].str.extract(r'([A-Za-z\s]+)$').squeeze()

print(size_units.unique())

['M' 'Varies with device' 'k']


Another example

In [36]:
df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

In [13]:
metadata = sa.MetaData()
metadata.reflect(bind=engine)

playstore = metadata.tables['playstore']

In [None]:
# TODO : write a query that returns the names of apps (App), their genres (Genres), 
# and their sizes (Size) from the playstore table, where the size of the app is 
# greater than 10 and the genre does not start with 'Art & Design'.

In [20]:
# assumig 10 is 10M
# There is no regex patter matching in sqlite so we can extract numeric values from 
# Size by using substr function (assuming units specified with one character)
# and getting all character from string except the last one
query = sa.select(playstore.c.App, playstore.c.Genres, playstore.c.Size) \
    .where(sa.and_(
        playstore.c.Size != 'Varies with device',
        ~playstore.c.Size.like('%k'),
        sa.cast(sa.func.substr(playstore.c.Size, 1, sa.func.length(playstore.c.Size) - 1), sa.Float) > 10,
        ~playstore.c.Genres.like('Art & Design%')
    ))

result = conn.execute(query)

In [21]:
# TODO : create a pandas DataFrame from the results of the SQL 
# query and then displays the first five rows of this DataFrame, 
# with the columns appropriately named according to the SQL query result.

df1 = pd.DataFrame(result.fetchall(), columns=result.keys())
df1.head(5)

Unnamed: 0,App,Genres,Size
0,Monster Truck Stunt 3D 2019,Auto & Vehicles,25M
1,Real Tractor Farming,Auto & Vehicles,56M
2,Ultimate F1 Racing Championship,Auto & Vehicles,57M
3,American Muscle Car Race,Auto & Vehicles,35M
4,Offroad Oil Tanker Driver Transport Truck 2019,Auto & Vehicles,33M
