## Analyzing a real world data-set with SQL and Python

The city of Chicago released a dataset showing all school level performance data used to create School Report Cards for the 2011-2012 school year. The dataset is available from the Chicago Data Portal: https://data.cityofchicago.org/Education/Chicago-Public-Schools-Progress-Report-Cards-2011-/9xs2-f89t

This dataset includes a large number of metrics. Start by familiarizing yourself with the types of metrics in the database: https://data.cityofchicago.org/api/assets/AAD41A13-BE8A-4E67-B1F5-86E711E09D5F%sdownload=true

__NOTE__: 

Do not download the dataset directly from City of Chicago portal. Instead download a static copy which is a more database friendly version from this <a href="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DB0201EN-SkillsNetwork/labs/FinalModule_edX/data/Chicago_Public_Schools_-_Progress_Report_Cards__2011-2012-v3.csv">link</a>.

Now review some of its contents.






### Connect to the database using PostgreSQL database
Let us first load the SQL extension and establish a connection with the database

In [17]:
# Install connection to PostgreSQL database (local)
#!pip install psycopg2

# Import packages
import csv
import psycopg2
import pandas as pd
print('Project libraries has been successfully installed!')

Project libraries has been successfully installed!


In [18]:
# Connect to the database
conn = psycopg2.connect(
    host = 'localhost',
    database = 'analysis', 
    user = 'postgres', 
    password = '@Mexx4u2nv',  
    port = '5432')
print('Connection to database is successfully')

Connection to database is successfully


In [19]:
# function to read from database
def read(conn, read_):
    print('Read')
    cursor = conn.cursor()
    cursor.execute(read_)
    for row in cursor:
        print(f'row = {row}')
    print()
    
# function to create in postgre database     
def create(conn, create_):
    cursor = conn.cursor() # create cursor object
    cursor.execute(create_) # execute query
    conn.commit() # commit query to database
    print('Table have been created successfull!!!')
    #read(conn)
    
# function to insert in postgre database     
def insert(conn, insert_):
    cursor = conn.cursor()
    cursor.execute(insert_)
    conn.commit()
    print('Records have been successfully inserted!!!')
    #read(conn)
    
# function to update table
def update(conn, update_):
    print('Update')
    cursor = conn.cursor()
    cursor.execute(update_)
    conn.commit()
    #read(conn)
    
# function to delete in postgre database
def delete(conn, delete_):
    print('Delete')
    cursor = conn.cursor()
    cursor.execute(delete_)
    conn.commit()
    #read(conn)

# close the cursor and connection to the server 
def close():
    cursor.close()
    conn.close()   
    
# function to create pandas dataframe
def create_pandas_df(sql_query, database=conn):
    table = pd.read_sql_query(sql_query, database)
    return table

### Create table in PostgreSQL database

In [20]:
# Create table ChicagoSchools
create_ = '''
DROP TABLE IF EXISTS chicago_schools;
CREATE TABLE chicago_schools (
    "School ID" INTEGER,
    "NAME_OF_SCHOOL" VARCHAR(255),
    "Elementary, Middle, or High School" VARCHAR(50),
    "Street Address" VARCHAR(255),
    "City" VARCHAR(50),
    "State" VARCHAR(2),
    "ZIP Code" VARCHAR(10),
    "Phone Number" VARCHAR(20),
    "Link " VARCHAR(255),
    "Network Manager" VARCHAR(255),
    "Collaborative Name" VARCHAR(255),
    "Adequate Yearly Progress Made%s " VARCHAR(5),
    "Track Schedule" VARCHAR(50),
    "CPS Performance Policy Status" VARCHAR(255),
    "CPS Performance Policy Level" VARCHAR(255),
    "HEALTHY_SCHOOL_CERTIFIED" VARCHAR(5),
    "Safety Icon " VARCHAR(255),
    "SAFETY_SCORE" INTEGER,
    "Family Involvement Icon" VARCHAR(255),
    "Family Involvement Score" INTEGER,
    "Environment Icon " VARCHAR(255),
    "Environment Score" INTEGER,
    "Instruction Icon " VARCHAR(50),
    "Instruction Score" INTEGER,
    "Leaders Icon " VARCHAR(50),
    "Leaders Score " VARCHAR(50),
    "Teachers Icon " VARCHAR(50),
    "Teachers Score" VARCHAR(50),
    "Parent Engagement Icon " VARCHAR(50),
    "Parent Engagement Score" VARCHAR(50),
    "Parent Environment Icon" VARCHAR(50),
    "Parent Environment Score" VARCHAR(50),
    "AVERAGE_STUDENT_ATTENDANCE" VARCHAR(20),
    "Rate of Misconducts (per 100 students) " DECIMAL(5, 2),
    "Average Teacher Attendance" VARCHAR(20),
    "Individualized Education Program Compliance Rate " VARCHAR(20),
    "Pk-2 Literacy %" VARCHAR(50),
    "Pk-2 Math %" VARCHAR(20),
    "Gr3-5 Grade Level Math %" VARCHAR(20),
    "Gr3-5 Grade Level Read % " VARCHAR(20),
    "Gr3-5 Keep Pace Read %" VARCHAR(20),
    "Gr3-5 Keep Pace Math %" VARCHAR(20),
    "Gr6-8 Grade Level Math %" VARCHAR(20),
    "Gr6-8 Grade Level Read %" VARCHAR(20),
    "Gr6-8 Keep Pace Math%" VARCHAR(20),
    "Gr6-8 Keep Pace Read %" VARCHAR(20),
    "Gr-8 Explore Math %" VARCHAR(20),
    "Gr-8 Explore Read %" VARCHAR(20),
    "ISAT Exceeding Math %" DECIMAL(5, 2),
    "ISAT Exceeding Reading % " DECIMAL(5, 2),
    "ISAT Value Add Math" DECIMAL(5, 2),
    "ISAT Value Add Read" DECIMAL(5, 2),
    "ISAT Value Add Color Math" VARCHAR(20),
    "ISAT Value Add Color Read" VARCHAR(20),
    "Students Taking  Algebra %" VARCHAR(20),
    "Students Passing  Algebra %" VARCHAR(20),
    "9th Grade EXPLORE (2009) " VARCHAR(20),
    "9th Grade EXPLORE (2010) " VARCHAR(20),
    "10th Grade PLAN (2009) " VARCHAR(20),
    "10th Grade PLAN (2010) " VARCHAR(20),
    "Net Change EXPLORE and PLAN" VARCHAR(20),
    "11th Grade Average ACT (2011) " VARCHAR(20),
    "Net Change PLAN and ACT" VARCHAR(20),
    "College Eligibility %" VARCHAR(20),
    "Graduation Rate %" VARCHAR(20),
    "College Enrollment Rate %" VARCHAR(20),
    "COLLEGE_ENROLLMENT" INTEGER,
    "General Services Route " VARCHAR(20),
    "Freshman on Track Rate %" VARCHAR(20),
    "X_COORDINATE" DOUBLE PRECISION,
    "Y_COORDINATE" DOUBLE PRECISION,
    "Latitude" DOUBLE PRECISION,
    "Longitude" DOUBLE PRECISION,
    "COMMUNITY_AREA_NUMBER" INTEGER,
    "COMMUNITY_AREA_NAME" VARCHAR(255),
    "Ward" INTEGER,
    "Police District" INTEGER,
    "Location" VARCHAR(255)
);

'''
create(conn, create_)

Table have been created successfull!!!


In [22]:
# PostgreSQL import CSV
cursor = conn.cursor()
with open('Chicago_Public_Schools_-_Progress_Report_Cards__2011-2012-v3', 'r') as f:
    reader = csv.reader(f)
    next(reader) # Skip the header row.
    for row in reader:
        cursor.execute(
            """INSERT INTO chicago_schools (
                "School ID", "NAME_OF_SCHOOL", "Elementary, Middle, or High School", "Street Address", "City", "State", "ZIP Code", "Phone Number", "Link ", "Network Manager", "Collaborative Name", "Adequate Yearly Progress Made%s ", "Track Schedule", "CPS Performance Policy Status", "CPS Performance Policy Level", "HEALTHY_SCHOOL_CERTIFIED", "Safety Icon ", "SAFETY_SCORE", "Family Involvement Icon", "Family Involvement Score", "Environment Icon ", "Environment Score", "Instruction Icon ", "Instruction Score", "Leaders Icon ", "Leaders Score ", "Teachers Icon ", "Teachers Score", "Parent Engagement Icon ", "Parent Engagement Score", "Parent Environment Icon", "Parent Environment Score", "AVERAGE_STUDENT_ATTENDANCE", "Rate of Misconducts (per 100 students) ", "Average Teacher Attendance", "Individualized Education Program Compliance Rate ", "Pk-2 Literacy %", "Pk-2 Math %", "Gr3-5 Grade Level Math %", "Gr3-5 Grade Level Read % ", "Gr3-5 Keep Pace Read %", "Gr3-5 Keep Pace Math %", "Gr6-8 Grade Level Math %", "Gr6-8 Grade Level Read %", "Gr6-8 Keep Pace Math%", "Gr6-8 Keep Pace Read %", "Gr-8 Explore Math %", "Gr-8 Explore Read %", "ISAT Exceeding Math %", "ISAT Exceeding Reading % ", "ISAT Value Add Math", "ISAT Value Add Read", "ISAT Value Add Color Math", "ISAT Value Add Color Read", "Students Taking  Algebra %", "Students Passing  Algebra %", "9th Grade EXPLORE (2009) ", "9th Grade EXPLORE (2010) ", "10th Grade PLAN (2009) ", "10th Grade PLAN (2010) ", "Net Change EXPLORE and PLAN", "11th Grade Average ACT (2011) ", "Net Change PLAN and ACT", "College Eligibility %", "Graduation Rate %", "College Enrollment Rate %", "COLLEGE_ENROLLMENT", "General Services Route ", "Freshman on Track Rate %", "X_COORDINATE", "Y_COORDINATE", "Latitude", "Longitude", "COMMUNITY_AREA_NUMBER", "COMMUNITY_AREA_NAME", "Ward", "Police District", "Location"
            ) 
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
        row
    )
conn.commit()
print('CSV file inserted into database successfully!!!')

FileNotFoundError: [Errno 2] No such file or directory: 'Chicago_Public_Schools_-_Progress_Report_Cards__2011-2012-v3'