In [1]:
# Import dependencies
import sqlite3
import pandas as pd
import numpy as np
from numpy import nan

In [2]:
# Connect to database
conn = sqlite3.connect('database.sqlite')

In [17]:
# Read sqlite query results into a pandas DataFrame
scholl_df = pd.read_sql_query('SELECT * from schools_table order by School_name',conn)
scholl_df

Unnamed: 0,index,school_id,school_name,type,size,budget
0,7,7,Farmer High School,District,3429,2235708
1,9,9,Floyd High School,Charter,2104,1224528
2,8,8,Greene High School,District,4690,3062570
3,2,2,Hood High School,Charter,930,578460
4,1,1,Long High School,Charter,628,368636
5,3,3,Lopez High School,District,3428,2248768
6,10,10,Patterson High School,District,4389,2887962
7,0,0,Stewart High School,Charter,1208,740504
8,5,5,Thompson High School,Charter,1353,838860
9,4,4,Vargas High School,Charter,2479,1574165


In [4]:
# Read sqlite query results into a pandas DataFrame
student_df = pd.read_sql_query('SELECT * from students_table',conn)
student_df.head()

Unnamed: 0,index,student_id,student_name,gender,grade,school_name,reading_score,math_score
0,0,0,Kevin Logan,M,9th,Stewart High School,93,98
1,1,1,Earl Bush,M,10th,Stewart High School,99,92
2,2,2,Jeffrey Williamson,M,10th,Stewart High School,98,98
3,3,3,Karen Dean,F,10th,Stewart High School,97,77
4,4,4,Rebecca Reyes,F,11th,Stewart High School,89,87


# District Summary

In [5]:
# Read sqlite query results into a pandas DataFrame
district_summary = pd.read_sql_query("""

SELECT count(DISTINCT(school_id)) as Total_Schools,
count(student_id) as Total_Students,
sum(budget) as Total_Budget,
avg(math_score) as Average_Math_Score,
avg(reading_score) as Average_Reading_Score,
math_passing_rate,
reading_passing_rate,
(math_passing_rate + reading_passing_rate)/2 as Overall_Passing_Rate

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

join

(select
cast(sum(mathover70) as float)/cast(count(mathover70) as float) as Math_Passing_Rate
from(
select
 student_id
 ,math_score
 , case when math_score > 70 then 1
		else 0
end as mathover70
from 
students_table))

join

(select
cast(sum(readingover70) as float)/cast(count(readingover70) as float) as Reading_Passing_Rate
from(
select
student_id ,reading_score, 
case when reading_score > 70 then 1
		else 0
end as readingover70
from 
students_table));

""",conn)

district_summary.head()

Unnamed: 0,Total_Schools,Total_Students,Total_Budget,Average_Math_Score,Average_Reading_Score,Math_Passing_Rate,Reading_Passing_Rate,Overall_Passing_Rate
0,11,27712,57663546353,82.164802,82.190279,0.836894,0.751876,0.794385


In [16]:
# Read sqlite query results into a pandas DataFrame
pass_table = pd.read_sql_query("""

SELECT count(b.student_name) as pass_count,
a.School_name

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

where reading_score > 70
and math_score > 70

group by a.School_name
order by a.School_name;

""",conn)

pass_table

Unnamed: 0,pass_count,school_name
0,1790,Farmer High School
1,1900,Floyd High School
2,2434,Greene High School
3,831,Hood High School
4,560,Long High School
5,1779,Lopez High School
6,2235,Patterson High School
7,1101,Stewart High School
8,1235,Thompson High School
9,2243,Vargas High School


In [7]:
# Read sqlite query results into a pandas DataFrame
all_table = pd.read_sql_query("""

SELECT count(b.student_name) as all_count,
a.School_name,
type as School_type,
sum(budget) as Total_Budget,
sum(budget)/count(student_id) as Per_Student_Budget,
avg(math_score) as Average_Math_Score,
avg(reading_score) as Average_Reading_Score,
(avg(math_score) + avg(reading_score))/2 as Average_Overall_Score

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

group by a.School_name
order by a.School_name;

""",conn)

all_table

Unnamed: 0,all_count,school_name,School_type,Total_Budget,Per_Student_Budget,Average_Math_Score,Average_Reading_Score,Average_Overall_Score
0,3429,Farmer High School,District,7666242732,2235708,81.594051,77.017498,79.305774
1,2104,Floyd High School,Charter,2576406912,1224528,83.028042,93.966255,88.497148
2,4690,Greene High School,District,14363453300,3062570,81.695522,76.807463,79.251493
3,930,Hood High School,Charter,537967800,578460,83.574194,94.077419,88.825806
4,628,Long High School,Charter,231503408,368636,83.068471,93.81051,88.43949
5,3428,Lopez High School,District,7708776704,2248768,81.725788,76.875146,79.300467
6,4389,Patterson High School,District,12675265218,2887962,81.481431,76.644566,79.062998
7,1208,Stewart High School,Charter,894528832,740504,83.774007,94.120033,88.94702
8,1353,Thompson High School,Charter,1134977580,838860,83.515891,94.115299,88.815595
9,2479,Vargas High School,Charter,3902355035,1574165,83.576442,93.961678,88.76906


#  School Summary

In [69]:
# Read sqlite query results into a pandas DataFrame
school_summary = pd.read_sql_query("""

SELECT 
c.School_name,
c.School_type,
c.Total_Budget,
c.Per_Student_Budget,
c.Average_Math_Score,
c.Average_Reading_Score,
c.Average_Overall_Score,
(pass_count*100 / all_count) ||'%' as Overall_Passing_Rate

from 

(SELECT count(b.student_name) as all_count,
a.School_name,
type as School_type,
sum(budget) as Total_Budget,
sum(budget)/count(student_id) as Per_Student_Budget,
avg(math_score) as Average_Math_Score,
avg(reading_score) as Average_Reading_Score,
(avg(math_score) + avg(reading_score))/2 as Average_Overall_Score

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

group by a.School_name) c

join 

(SELECT count(b.student_name) as pass_count,
a.School_name

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

where reading_score > 70
and math_score > 70

group by a.School_name
order by a.School_name) d

using (School_name)
group by School_name
;

""",conn)

school_summary

Unnamed: 0,School_name,School_type,Total_Budget,Per_Student_Budget,Average_Math_Score,Average_Reading_Score,Average_Overall_Score,Overall_Passing_Rate
0,Farmer High School,District,7666242732,2235708,81.594051,77.017498,79.305774,52%
1,Floyd High School,Charter,2576406912,1224528,83.028042,93.966255,88.497148,90%
2,Greene High School,District,14363453300,3062570,81.695522,76.807463,79.251493,51%
3,Hood High School,Charter,537967800,578460,83.574194,94.077419,88.825806,89%
4,Long High School,Charter,231503408,368636,83.068471,93.81051,88.43949,89%
5,Lopez High School,District,7708776704,2248768,81.725788,76.875146,79.300467,51%
6,Patterson High School,District,12675265218,2887962,81.481431,76.644566,79.062998,50%
7,Stewart High School,Charter,894528832,740504,83.774007,94.120033,88.94702,91%
8,Thompson High School,Charter,1134977580,838860,83.515891,94.115299,88.815595,91%
9,Vargas High School,Charter,3902355035,1574165,83.576442,93.961678,88.76906,90%


# Top Performing Schools (By Passing Rate)

In [9]:
# Read sqlite query results into a pandas DataFrame
# Read sqlite query results into a pandas DataFrame
#top5 = pd.read_sql_query("""

SELECT a.School_name,
type as School_type,
count(student_id) as Total_Students,
sum(budget) as Total_Budget,
sum(budget)/count(student_id) as Per_Student_Budget,
avg(math_score) as Average_Math_Score,
avg(reading_score) as Average_Reading_Score,
math_passing_rate,
reading_passing_rate,
(math_passing_rate + reading_passing_rate)/2 as overall_passing_rate

FROM schools_table a
inner join  students_table b
on a.school_name = b.school_name

join

(select
cast(sum(mathover70) as float)/cast(count(mathover70) as float) as Math_Passing_Rate
from(
select
 student_id
 ,math_score
 , case when math_score > 70 then 1
		else 0
end as mathover70
from 
students_table))

join

(select
cast(sum(readingover70) as float)/cast(count(readingover70) as float) as Reading_Passing_Rate
from(
select
student_id ,reading_score, 
case when reading_score > 70 then 1
		else 0
end as readingover70
from 
students_table))

group by a.school_name

order by overall_passing_rate

limit 5;

""",conn)

top5

SyntaxError: invalid syntax (<ipython-input-9-11e11ccf1010>, line 5)

# Bottom Performing Schools (By Passing Rate)

In [None]:
# Read sqlite query results into a pandas DataFrame


# Math Scores by Grade

In [None]:
# Read sqlite query results into a pandas DataFrame


# Reading Score by Grade

In [None]:
# Read sqlite query results into a pandas DataFrame


# Scores by School Spending

In [None]:
# Read sqlite query results into a pandas DataFrame


# Scores by School Size

In [None]:
# Read sqlite query results into a pandas DataFrame


# Scores by School Type

In [None]:
# Read sqlite query results into a pandas DataFrame
