### Standard Python and R imports

In [4]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
%%R

# My commonly used R imports

require('tidyverse')
require('broom')
require('GGally')

## Read in data

In [45]:
%%R
library(data.table)

# Import data with R
df <- fread('../data/final_data.csv')
lunch_program <- fread('../data/ccd_sch_033_2122_l_1a_071722/ccd_sch_033_2122_l_1a_071722.csv') 

In [46]:
%%R

lunch_program <- lunch_program %>% 
  mutate(LEAID = as.character(LEAID)) %>% 
  filter(LUNCH_PROGRAM %in% c('Reduced-price lunch qualified', 'Free lunch qualified')) %>% 
  group_by(SCHID, LEAID) %>% 
  summarize(red_free_lunch = sum(STUDENT_COUNT, na.rm = TRUE), .groups = "drop")

In [47]:
%%R


values_to_replace <- c(-3, -4, -5, -6, -9,-12,-13)

df_clean <- df %>% 
        mutate(across(everything(), ~ replace(.x, .x %in% values_to_replace, NA))) 

df_clean <- df_clean %>% left_join(lunch_program, by = join_by(SCHID, LEAID))

In [56]:
%%R

# df_clean %>% filter(SCHID == 10001)

lunch_program %>% head(20)

# A tibble: 20 × 3
    SCHID LEAID  red_free_lunch
    <int> <chr>           <int>
 1 100001 100030            198
 2 100003 100030            302
 3 100004 100030            376
 4 100005 100030              0
 5 100006 100030            236
 6 100007 100060            209
 7 100008 100060            123
 8 100011 100090            387
 9 100013 100270            265
10 100017 100090            302
11 100020 100090            317
12 100021 100090              0
13 100022 100100              0
14 100024 100270            584
15 100025 100100            138
16 100027 100100            118
17 100028 100100            100
18 100030 102220            215
19 100031 103030            151
20 100033 100180            282


In [64]:
%%R -o df_filtered

# filter data for schools applicable to analysis of Black/white student suspension ratio
# mostly ensuring that there are enough Black and white students to adequately compate

df_filtered <- df_clean %>% 
# filter for schools fit for analysis
filter(SCH_ENR_BL_M > 0, # more than 0 black students
          SCH_ENR_WH_M > 0, # more than 0 white students
           SCH_DISCWODIS_ISS_BL_M + SCH_DISCWODIS_ISS_BL_F > 5, # more than 5 black in-school suspension
           SCH_DISCWODIS_ISS_WH_M + SCH_DISCWODIS_ISS_WH_F> 5 # more than 5 white male in-school suspension
          # SCH_DISCWODIS_ISS_BL_M / SCH_ENR_BL_M > 0,
          #  SCH_DISCWODIS_ISS_BL_M / SCH_ENR_BL_M <= 1,
          # SCH_DISCWODIS_ISS_WH_M / SCH_ENR_WH_M > 0,
          #   SCH_DISCWODIS_ISS_WH_M / SCH_ENR_WH_M <= 1
) %>%
# create variables for analysis
mutate(susp_black_white_ratio = 
             ((SCH_DISCWODIS_ISS_BL_M + SCH_DISCWODIS_ISS_BL_F) / (SCH_ENR_BL_M + SCH_ENR_BL_F)) / 
             ((SCH_DISCWODIS_ISS_WH_M + SCH_DISCWODIS_ISS_WH_F) / (SCH_ENR_WH_M + SCH_ENR_WH_F)),
            black_white_pop_ratio = ((SCH_ENR_BL_M + SCH_ENR_BL_F) / ((SCH_ENR_BL_M + SCH_ENR_BL_F) + (SCH_ENR_WH_M + SCH_ENR_WH_F))),
           school_pop = rowSums(select(., SCH_ENR_HI_M, SCH_ENR_HI_F, SCH_ENR_HI_X,
                                           SCH_ENR_AM_M, SCH_ENR_AM_F, SCH_ENR_AM_X,
                                           SCH_ENR_AS_M, SCH_ENR_AS_F, SCH_ENR_AS_X,
                                           SCH_ENR_HP_M, SCH_ENR_HP_F, SCH_ENR_HP_X,
                                           SCH_ENR_BL_M, SCH_ENR_BL_F, SCH_ENR_BL_X,
                                           SCH_ENR_WH_M, SCH_ENR_WH_F, SCH_ENR_WH_X,
                                           SCH_ENR_TR_M, SCH_ENR_TR_F, SCH_ENR_TR_X),
                                 na.rm = TRUE),
teacher_prop = (SCH_FTETEACH_TOT / school_pop) ,
              cert_teacher_prop = SCH_FTETEACH_CERT / SCH_FTETEACH_TOT,  # certified teachers / teachers
              counc_prop = SCH_FTECOUNSELORS / school_pop,  # councelors / school pop
            law_prop = SCH_FTESECURITY_LEO / school_pop, # LEA / school pop
            security_pop = SCH_FTESECURITY_GUA / school_pop,  # security / school pop

              security_pop_bool = factor(ifelse(SCH_FTESECURITY_LEO > 0, 1,0)) # security bool
      ) %>%
mutate(total_susp_per_pop = ((SCH_DISCWODIS_ISS_BL_M + SCH_DISCWODIS_ISS_BL_F) + (SCH_DISCWODIS_ISS_WH_M + SCH_DISCWODIS_ISS_WH_F)) /  ((SCH_ENR_BL_M + SCH_ENR_BL_F) +(SCH_ENR_WH_M + SCH_ENR_WH_F))) 


In [60]:
%%R 
df_filtered %>% select(STUDENT_COUNT)

Error in `select()`:
! Can't select columns that don't exist.
✖ Column `STUDENT_COUNT` doesn't exist.
Run `rlang::last_trace()` to see where the error occurred.

Error in select(., STUDENT_COUNT) :


RInterpreterError: Failed to parse and evaluate line 'df_filtered %>% select(STUDENT_COUNT)\n'.
R error message: 'Error in select(., STUDENT_COUNT) :'

In [65]:
%%R

write_csv(df_filtered, "../data/final_data_filtered.csv")