In [1]:
import os
import pandas as pd

In [2]:
path = os.getcwd()

In [3]:
path

'C:\\Users\\gibra\\Desktop\\Data Science\\Portfolio\\SQL\\sqlite3'

# **Library for SQL in Python**

In [4]:
#Import SQL
import sqlite3

In [5]:
# Connect to a database (creates a new database if it doesn't exist)
conn = sqlite3.connect('database.db')

In [6]:
#Create a cursor object to execute SQL statements
cursor = conn.cursor()

# Pandas and SQL

## Import sql table into a Dataframe

In [7]:
''' We can create a pandas dataframe with the results from our sql query'''

all_employees_query = f"SELECT * from EMPLOYEES;"
employees_df = pd.read_sql(all_employees_query, conn) #read_sql needs the query, and the connection to the database

In [8]:
employees_df.head()

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1001,John,Thomas,123456,1976-01-09,M,"5631 Rice, OakPark,IL",100,100000,30001,2
1,E1002,Alice,James,123457,1972-07-31,F,"980 Berry ln, Elgin,IL",200,80000,30002,5
2,E1003,Steve,Wells,123458,1980-08-10,M,"291 Springs, Gary,IL",300,50000,30002,5
3,E1004,Santosh,Kumar,123459,1985-07-20,M,"511 Aurora Av, Aurora,IL",400,60000,30004,5
4,E1005,Ahmed,Hussain,123410,1981-01-04,M,"216 Oak Tree, Geneva,IL",500,70000,30001,2


# Converting a pandas Dataframe into a SQL table

In [9]:
'''In order to convert a pandas DataFrame into a table for our SQL database,
so that it can be accessed and queried using SQL commands, we can use the pandas dataframe to_sql() function.'''
#For example: Let's load data from the Chicago open data portal about socieconomic indicators in the city. 
# The data will be load from a csv file into a pandas dataframe, and then persisted to our database
socioeconomic_df = pd.read_csv('https://data.cityofchicago.org/resource/jcxq-k9xf.csv')
socioeconomic_df.head(2)

Unnamed: 0,ca,community_area_name,percent_of_housing_crowded,percent_households_below_poverty,percent_aged_16_unemployed,percent_aged_25_without_high_school_diploma,percent_aged_under_18_or_over_64,per_capita_income_,hardship_index
0,1.0,Rogers Park,7.7,23.6,8.7,18.2,27.5,23939,39.0
1,2.0,West Ridge,7.8,17.2,8.8,20.8,38.5,23040,46.0


In [10]:
#This function requires the name of the sql table where our df will be stored, the connector, and what to do if the table
#already exists. 
socioeconomic_df.to_sql('socioeconomic_data', conn, if_exists='replace', index=False)

78

In [11]:
#We can verify the table creation by making a simple query
simple_query = 'SELECT * FROM SOCIOECONOMIC_DATA LIMIT 5'
cursor.execute(simple_query)
rows = cursor.fetchall()

# Extract and print the SQL statement
for row in rows:
    print("\t".join(map(str,row)))

1.0	Rogers Park	7.7	23.6	8.7	18.2	27.5	23939	39.0
2.0	West Ridge	7.8	17.2	8.8	20.8	38.5	23040	46.0
3.0	Uptown	3.8	24.0	8.9	11.8	22.2	35787	20.0
4.0	Lincoln Square	3.4	10.9	8.2	13.4	25.5	37524	17.0
5.0	North Center	0.3	7.5	5.2	4.5	26.2	57123	6.0


# Python Variables

## Queries

In [12]:
'''You can use python variables in sql queries using the ":" sign'''
community = 'North Center'

python_variable_query = 'SELECT * FROM SOCIOECONOMIC_DATA WHERE COMMUNITY_AREA_NAME = :python_var'
cursor.execute(python_variable_query, {'python_var': community})
rows = cursor.fetchall()

# Extract and print the SQL statement
for row in rows:
    print("\t".join(map(str,row)))

5.0	North Center	0.3	7.5	5.2	4.5	26.2	57123	6.0


## Saving sql queries in python variables

In [13]:
'''You can fetch the results of a query into a pandas dataframe, by adding the column attributes from cursor.description'''
hi = 50.0

query_for_dataframe = 'SELECT * FROM SOCIOECONOMIC_DATA WHERE HARDSHIP_INDEX > :hardship'
cursor.execute(query_for_dataframe, {'hardship':hi})

high_hardship_df = pd.DataFrame(cursor.fetchall(), columns=[column[0] for column in cursor.description])
high_hardship_df.head()

Unnamed: 0,ca,community_area_name,percent_of_housing_crowded,percent_households_below_poverty,percent_aged_16_unemployed,percent_aged_25_without_high_school_diploma,percent_aged_under_18_or_over_64,per_capita_income_,hardship_index
0,14.0,Albany Park,11.3,19.2,10.0,32.9,32.0,21323,53.0
1,19.0,Belmont Cragin,10.8,18.7,14.6,37.3,37.3,15461,70.0
2,20.0,Hermosa,6.9,20.5,13.1,41.6,36.4,15089,71.0
3,23.0,Humboldt park,14.8,33.9,17.3,35.4,38.0,13781,85.0
4,25.0,Austin,6.3,28.6,22.6,24.4,37.9,15957,73.0


In [14]:
conn.commit()
conn.close()