In [6]:
import duckdb
import pandas as pd
import config

data = duckdb.connect(f"../{config.DATA_DIR_NAME}/data_tables.db")

## School type

In [7]:
data.sql("""
    select *
    from sen_phase_type
    limit 5
    """)

┌─────────────┬─────────────────┬──────────────────┬──────────────┬──────────────┬───────────────┬─────────────┬─────────────┬───────────────┬─────────────┬─────────────────────────────┬─────────────────────────────┬─────────────────┬──────────────┬─────────────┬──────────┬─────────────────────┬──────────────────┐
│ time_period │ time_identifier │ geographic_level │ country_code │ country_name │  region_name  │ region_code │ old_la_code │    la_name    │ new_la_code │     phase_type_grouping     │    type_of_establishment    │ hospital_school │ total_pupils │ sen_support │ ehc_plan │ sen_support_percent │ ehc_plan_percent │
│    int64    │     varchar     │     varchar      │   varchar    │   varchar    │    varchar    │   varchar   │    int64    │    varchar    │   varchar   │           varchar           │           varchar           │     varchar     │   varchar    │   varchar   │ varchar  │       varchar       │     varchar      │
├─────────────┼─────────────────┼──────────────────┼

In [8]:
# query into a pandas dataframe
school_type = data.sql(
    """
    select *
    from sen_phase_type
    """).df()

school_type.head()

Unnamed: 0,time_period,time_identifier,geographic_level,country_code,country_name,region_name,region_code,old_la_code,la_name,new_la_code,phase_type_grouping,type_of_establishment,hospital_school,total_pupils,sen_support,ehc_plan,sen_support_percent,ehc_plan_percent
0,202324,Academic year,Local authority,E92000001,England,East Midlands,E12000004,925,Lincolnshire,E10000019,State-funded secondary,Academy sponsor led,No,16348,2842,615,17.38438953,3.761928065
1,202324,Academic year,Local authority,E92000001,England,East Midlands,E12000004,925,Lincolnshire,E10000019,State-funded secondary,Community school,No,2111,173,7,8.195168167,0.3315964
2,202324,Academic year,Local authority,E92000001,England,West Midlands,E12000005,335,Walsall,E08000030,State-funded special school,Academy special sponsor led,No,140,0,140,0.0,100.0
3,202324,Academic year,Local authority,E92000001,England,West Midlands,E12000005,335,Walsall,E08000030,State-funded special school,Community special school,No,790,0,790,0.0,100.0
4,202324,Academic year,Local authority,E92000001,England,West Midlands,E12000005,336,Wolverhampton,E08000031,Independent school,Other independent school,No,1430,238,11,16.64335664,0.769230769


## Age groups

In [9]:
data.sql("""
    select *
    from sen_age_sex
    limit 5
    """)

┌─────────────┬─────────────────┬──────────────────┬──────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────┬─────────────┬───────────────────────────────┬──────────────────┬────────────────────────────┬─────────────────┬─────────────────┬─────────────┬───────────────┐
│ time_period │ time_identifier │ geographic_level │ country_code │ country_name │ region_name │ region_code │ old_la_code │ la_name │ new_la_code │      phase_type_grouping      │ pupil_sen_status │        primary_need        │ breakdown_topic │    breakdown    │ pupil_count │ pupil_percent │
│    int64    │     varchar     │     varchar      │   varchar    │   varchar    │   varchar   │   varchar   │    int64    │ varchar │   varchar   │            varchar            │     varchar      │          varchar           │     varchar     │     varchar     │    int64    │    double     │
├─────────────┼─────────────────┼──────────────────┼──────────────┼──────────────┼─────────────┼─────────────┼─────

In [10]:
# query into a pandas dataframe
age_groups = data.sql(
    """
    select time_period, phase_type_grouping, breakdown, sum(pupil_count) as pupil_count
    from sen_age_sex
    where phase_type_grouping = 'State-funded primary' and pupil_sen_status = 'EHC plans' and breakdown_topic = 'Age'
    group by time_period, pupil_sen_status, phase_type_grouping, breakdown
    order by breakdown, time_period    
    """).df()

age_groups.head()

Unnamed: 0,time_period,phase_type_grouping,breakdown,pupil_count
0,201516,State-funded primary,Age 10,64980.0
1,201617,State-funded primary,Age 10,64356.0
2,201718,State-funded primary,Age 10,69480.0
3,201819,State-funded primary,Age 10,79254.0
4,201920,State-funded primary,Age 10,89454.0


## Year groups

In [11]:
data.sql("""
    select *
    from sen_ncyear
    limit 5
    """)

┌─────────────┬─────────────────┬──────────────────┬──────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────┬─────────────┬───────────────────────────────┬──────────────────┬─────────────────────────────┬──────────────────┬────────────────┬──────────────┬───────────┬───────────┬───────────┬───────────┬───────────┬───────────┬───────────┬───────────┬───────────┬────────────┬────────────┬────────────┬────────────┬────────────┬─────────────────┬────────────────────────┬──────────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬───────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬─────────────────────────┐
│ time_period │ time_identifier │ geographic_level │ country_code │ country_name │ region_name │ region_code │ old_la_code │ la_name │ new_la_code │      phase_type_grou

In [12]:
# query into a pandas dataframe
year_groups = data.sql(
    """
    select  *
    from sen_ncyear
    limit 5    
    """).df()

year_groups.head()

Unnamed: 0,time_period,time_identifier,geographic_level,country_code,country_name,region_name,region_code,old_la_code,la_name,new_la_code,...,nc_year_6_percent,nc_year_7_percent,nc_year_8_percent,nc_year_9_percent,nc_year_10_percent,nc_year_11_percent,nc_year_12_percent,nc_year_13_percent,nc_year_14_percent,nc_not_followed_percent
0,202324,Academic year,National,E92000001,England,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,202324,Academic year,National,E92000001,England,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,202324,Academic year,National,E92000001,England,,,,,,...,14.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,202324,Academic year,National,E92000001,England,,,,,,...,14.285714,14.285714,0.0,0.0,14.285714,0.0,0.0,0.0,0.0,0.0
4,202324,Academic year,National,E92000001,England,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
