In [69]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, balanced_accuracy_score
from imblearn.over_sampling import SMOTENC
import numpy as np
import plotly.express as px


# Data Loading

In [2]:
df_2016: pd.DataFrame = pd.read_csv('./data/nhes_16_pfi_v1_0.csv')
df_2019: pd.DataFrame = pd.read_csv('./data/nhes_19_pfi_v1_0.csv')

In [3]:
print(f'2019 Number of Rows: {df_2016.shape[0]:,}')
df_2016.head(20)

2019 Number of Rows: 14,075


Unnamed: 0,BASMID,PATH,QTYPE,GRADE,SCPUBPRI,DISTASSI,SCHRTSCHL,SNEIGHBRX,SPUBCHOIX,SCONSIDR,...,F_HMEDICAID,F_HCHIP,F_HSECN8,F_TTLHHINC,F_YRSADDR,F_OWNRNTHB,F_HVINTSPHO,F_HVINTCOM,F_USEINTRNT,F_HHUNID
0,20161000013,E,2,6,4,2,2,2,1,2,...,0,0,0,3,0,0,0,0,0,0
1,20161000017,S,2,12,4,1,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
2,20161000050,E,2,4,4,1,2,2,2,2,...,0,0,0,3,0,0,0,0,0,0
3,20161000057,H,1,-1,-1,-1,-1,-1,-1,-1,...,0,0,0,0,0,0,0,0,0,0
4,20161000058,S,2,12,4,1,2,2,1,2,...,0,0,0,0,0,0,0,0,0,0
5,20161000064,S,2,13,4,1,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
6,20161000065,S,2,13,4,1,2,2,1,2,...,0,0,0,0,0,0,0,0,0,0
7,20161000085,H,1,-1,-1,-1,-1,-1,-1,-1,...,0,0,0,0,0,0,0,0,0,0
8,20161000096,S,2,13,4,1,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
9,20161000120,S,2,12,4,1,2,2,1,1,...,0,0,0,0,0,0,0,0,0,0


In [4]:
# Modify 'SEABSNT' so that it data is consistent with 2019
# 0-5 days: 1
# 6-10: 2
# 11-20: 3
# More than 20: 4
# Skipped: -1
def map_seabsnt(days):
    if days == -1:
        return -1
    elif 0 <= days <= 5:
        return 1
    elif 6 <= days <= 10:
        return 2
    elif 11 <= days <= 20:
        return 3
    elif days > 20:
        return 4

df_2016['SEABSNT'] = df_2016['SEABSNT'].apply(map_seabsnt)
df_2016

Unnamed: 0,BASMID,PATH,QTYPE,GRADE,SCPUBPRI,DISTASSI,SCHRTSCHL,SNEIGHBRX,SPUBCHOIX,SCONSIDR,...,F_HMEDICAID,F_HCHIP,F_HSECN8,F_TTLHHINC,F_YRSADDR,F_OWNRNTHB,F_HVINTSPHO,F_HVINTCOM,F_USEINTRNT,F_HHUNID
0,20161000013,E,2,6,4,2,2,2,1,2,...,0,0,0,3,0,0,0,0,0,0
1,20161000017,S,2,12,4,1,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
2,20161000050,E,2,4,4,1,2,2,2,2,...,0,0,0,3,0,0,0,0,0,0
3,20161000057,H,1,-1,-1,-1,-1,-1,-1,-1,...,0,0,0,0,0,0,0,0,0,0
4,20161000058,S,2,12,4,1,2,2,1,2,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14070,20161226491,S,2,12,1,-1,-1,2,3,1,...,0,0,0,0,0,0,0,0,0,0
14071,20161226533,S,2,14,4,2,2,2,2,1,...,0,0,0,0,0,0,0,0,0,0
14072,20161226569,E,2,6,4,1,2,1,2,2,...,0,0,0,0,0,0,0,0,0,0
14073,20161226573,M,2,9,4,1,2,2,1,2,...,3,3,3,0,0,0,0,0,0,0


In [5]:
print(f'2019 Number of Rows: {df_2019.shape[0]:,}')
df_2019.head(20)

2019 Number of Rows: 16,446


Unnamed: 0,BASMID,ALLGRADEX,EDCPUB,EDCCAT,EDCREL,EDCPRI,EDCINTK12,EDCINTCOL,EDCCOL,EDCHSFL,...,F_OWNRNTHB,F_HVINTSPHO,F_HVINTCOM,F_CHLDNT,F_LRNCOMP,F_LRNTAB,F_LRNCELL,F_SEFUTUREX,F_HHUNID,F_ZIPLOCL
0,20191000012,12,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
1,20191000029,4,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
2,20191000059,11,2,2,1,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
3,20191000070,6,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
4,20191000078,9,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
5,20191000083,6,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
6,20191000121,9,2,2,2,2,2,2,2,1,...,0,0,0,0,0,0,0,0,0,0
7,20191000149,8,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
8,20191000163,5,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0
9,20191000187,9,1,2,2,2,2,2,2,2,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# We need this data combined and only a particular number of columns for our model

rel_features: list = ['SEGRADES', 'SEADPLCXX', 'SEABSNT', 'FSSPHW', 'FHHOME', 'FHWKHRS', 'FHPLACE', 'FOSPORT', 'FODINNERX', 'FOLIBRAYX', 'ALLGRADEX']

df: pd.DataFrame = pd.concat([df_2016[rel_features], df_2019[rel_features]], axis = 0)

print(f'New DataFrame Number of Rows: {df.shape[0]:,}')
df.head(20)

New DataFrame Number of Rows: 30,521


Unnamed: 0,SEGRADES,SEADPLCXX,SEABSNT,FSSPHW,FHHOME,FHWKHRS,FHPLACE,FOSPORT,FODINNERX,FOLIBRAYX,ALLGRADEX
0,1,-1,1,1,4,10,1,1,6,1,3
1,2,2,1,1,3,4,1,1,5,2,9
2,5,-1,1,2,3,4,1,1,7,1,1
3,-1,-1,-1,-1,-1,-1,-1,1,7,1,9
4,1,1,1,2,4,6,1,1,5,2,9
5,2,2,1,4,3,6,2,2,4,2,10
6,1,2,1,4,5,-1,-1,1,0,2,10
7,-1,-1,-1,-1,-1,-1,-1,1,5,2,10
8,3,2,1,2,4,10,1,1,4,2,10
9,1,2,1,3,3,1,1,1,7,2,9


In [7]:
# We want to focus on high school students here, so those that are eligible to take AP courses (in other words, we want all the rows where SEADPLCXX is not 0)
df = df.loc[df['SEADPLCXX'] != -1]

# We also want to drop the roughly ~100 students where there schools doesn't given them normal grades
df = df.loc[df['SEGRADES'] != 5]

print(f'New number of rows after removing non-AP eligible students: {df.shape[0]:,}')

New number of rows after removing non-AP eligible students: 11,350


In [8]:
# Let's also see how the distribution of values for the other columns look like
df.isin([-1]).sum()

SEGRADES       0
SEADPLCXX      0
SEABSNT        0
FSSPHW        40
FHHOME        40
FHWKHRS      711
FHPLACE      711
FOSPORT        0
FODINNERX      0
FOLIBRAYX      0
ALLGRADEX      0
dtype: int64

# Visualizations

In [9]:
# To convert in a moment
num_to_grades: dict[int, str] = {1: "Mostly A's", 2: "Mostly B's", 3: "Mostly C's", 4: "Mostly D's or lower"}

# For color purposes
NUM_OF_BINS: int = len(df['SEGRADES'].unique())

RAINBOW_COLORS: list[str] = ["#ffadad","#ffd6a5","#fdffb6","#caffbf","#9bf6ff","#a0c4ff","#bdb2ff","#ffc6ff"]

RAINBOW_COLOR_SEQ: dict[str, str] = {list(num_to_grades.values())[i]:RAINBOW_COLORS[i % len(RAINBOW_COLORS)] for i in range(NUM_OF_BINS)}

In [10]:
RAINBOW_COLOR_SEQ

{"Mostly A's": '#ffadad',
 "Mostly B's": '#ffd6a5',
 "Mostly C's": '#fdffb6',
 "Mostly D's or lower": '#caffbf'}

In [11]:
# Let's get a basic idea of how these students are performing by creating a bar chart for each category

# First, we can group by the grades to get the counts
df_grouped_by_grades: pd.DataFrame = df.groupby(by = 'SEGRADES').size().reset_index(name = 'count')

# Convert these numerical columns to textual ones for visualization purposes
df_grouped_by_grades['SEGRADES'] = df_grouped_by_grades['SEGRADES'].map(num_to_grades)

grade_dist: px.bar = px.bar(
    data_frame = df_grouped_by_grades,
    x = 'SEGRADES',
    y = 'count',
    color = 'SEGRADES',  # Specify the column for coloring
    color_discrete_map = RAINBOW_COLOR_SEQ,
    text_auto = True,
    template = 'presentation',
    labels = {'SEGRADES': 'Student Grade'}
)

grade_dist.update_layout(
    font_family = "Raleway, sans-serif",
    showlegend = False,
    title = dict(text=f'<b>Grade Distribution of High Schoolers Across 2016-2019</b><br><sup>As provided by the Parent and Family Involvement In Education Surveys</sup>'),
)

grade_dist


In [12]:
grade_dist.write_image('./figures/grade_dist.png', scale = 6)

In [13]:
# We can also create a visualization that sees how the grades evolved between the two years
df_2016_grouped: pd.DataFrame = df_2016.loc[(df_2016['SEGRADES'] != 5) & (df_2016['SEADPLCXX'] != -1)].groupby('SEGRADES').size().reset_index(name='2016')
df_2019_grouped: pd.DataFrame = df_2019.loc[(df_2019['SEGRADES'] != 5) & (df_2019['SEADPLCXX'] != -1)].groupby('SEGRADES').size().reset_index(name='2019')

# Merge the grouped data
df_grade_comparison: pd.DataFrame = pd.merge(df_2016_grouped, df_2019_grouped, on='SEGRADES', how='inner')

# Map numerical grades to textual grades
df_grade_comparison['SEGRADES'] = df_grade_comparison['SEGRADES'].map(num_to_grades)

comparison_chart: px.bar = px.bar(
    data_frame=df_grade_comparison,
    x='SEGRADES',
    y=['2016', '2019'],
    barmode='group',
    labels={'value': 'Count', 'variable': 'Year', 'SEGRADES': 'Student Grade'},
    title='Comparison of Student Grades Between 2016 and 2019',
    color_discrete_map = {'2016': '#9bf6ff', '2019': '#bdb2ff'},
    text_auto = True,
    template = 'presentation'
)

comparison_chart.update_layout(
   font_family = "Raleway, sans-serif",
   showlegend = True,
   title = dict(text=f'<b>Grade Comparison Among High Schoolers: 2016 to 2019</b><br><sup>As provided by the Parent and Family Involvement In Education Surveys</sup>'),
)

comparison_chart

In [14]:
comparison_chart.write_image('./figures/grade_comparison.png', scale = 2)

In [15]:
df_grade_comparison

Unnamed: 0,SEGRADES,2016,2019
0,Mostly A's,2357,3191
1,Mostly B's,1869,2104
2,Mostly C's,769,732
3,Mostly D's or lower,176,152


In [16]:
# Export image
grade_dist.write_image('./figures/grade_dist.png', scale = 10)

# Model Implementation

In [17]:
def display_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, title: str, labels: list[int]):
    """plot confusion matrix with plotly
    adapted from this stack overflow post: https://stackoverflow.com/a/79074318

    Args:
        y_true (np.ndarray): true y values
        y_pred (np.ndarray): predicted y values from the model
        title (str): title of the chart
        labels (list[int]): labels for the axes
    """
    cm = confusion_matrix(y_true, y_pred)
    
    dims = [str(l) for l in labels]

    fig = px.imshow(cm, 
                    x=dims, 
                    y=dims, 
                    color_continuous_scale='Purples', 
                    aspect="auto")

    fig.update_traces(text=cm, texttemplate="%{text}")

    fig.update_layout(
        title=f'<b>{title}</b>',
        xaxis_title='Predicted',
        yaxis_title='True',
        dragmode='select',
        hovermode='closest',
        template='presentation',
        font=dict(size=15),  
        yaxis=dict(tickmode='linear', automargin=True),
        xaxis=dict(tickangle=15)
    )
    fig.show()

    return fig

In [92]:
# Split our datasets 
X = df.drop(['SEGRADES', 'ALLGRADEX'], axis=1)
y = df['SEGRADES']

X_trn: np.ndarray
X_tst: np.ndarray
y_trn: np.ndarray
y_tst: np.ndarray

X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size = .3, shuffle = True, stratify = y)

In [93]:
# Impute them

# For model purposes, because the values are relatively limited and not that common, we can impute them using MICE with an MLPRegressor (because the data is probably not linear)
# mice_imputer: IterativeImputer = IterativeImputer(estimator = RandomForestRegressor(), random_state = 42, max_iter = 10, missing_values = -1, tol=1e-1)

# X_trn = pd.DataFrame(mice_imputer.fit_transform(X_trn), columns = X.columns)
# X_tst = pd.DataFrame(mice_imputer.transform(X_tst), columns = X.columns)

simple_imputer: SimpleImputer = SimpleImputer(missing_values = -1, strategy = 'most_frequent')
X_trn = pd.DataFrame(simple_imputer.fit_transform(X_trn), columns = X.columns)
X_tst = pd.DataFrame(simple_imputer.transform(X_tst), columns = X.columns)

In [94]:
X_trn

Unnamed: 0,SEADPLCXX,SEABSNT,FSSPHW,FHHOME,FHWKHRS,FHPLACE,FOSPORT,FODINNERX,FOLIBRAYX
0,2,1,1,4,10,1,2,7,2
1,1,1,3,3,4,1,2,4,2
2,2,2,2,3,4,1,2,5,2
3,2,2,1,1,2,1,2,7,1
4,1,1,4,4,14,1,2,7,1
...,...,...,...,...,...,...,...,...,...
7940,1,1,4,4,20,2,2,7,2
7941,2,1,4,1,2,1,1,6,2
7942,1,1,1,2,9,1,1,0,1
7943,1,1,4,4,15,1,1,4,2


In [None]:
categorical_indices = [0,1,2,3,4,6,7,8]

sm = SMOTENC(categorical_features=categorical_indices, random_state=42)
X_trn, y_trn = sm.fit_resample(X_trn, y_trn)

In [96]:
X_trn.isna().sum()

SEADPLCXX    0
SEABSNT      0
FSSPHW       0
FHHOME       0
FHWKHRS      0
FHPLACE      0
FOSPORT      0
FODINNERX    0
FOLIBRAYX    0
dtype: int64

In [97]:
# We can also scale our data; however, given that many of these values are encoded categorical variables, we need to account for that
# We can also one-hot encode the categorical features
numeric_transformer: StandardScaler = StandardScaler()
categorical_transformer: OneHotEncoder = OneHotEncoder(handle_unknown = 'ignore')

categorical_features: list[str] = ['SEADPLCXX', 'FSSPHW', 'FHHOME', 'FHPLACE', 'FOSPORT', 'FOLIBRAYX', 'SEABSNT']
numeric_features: list[str] = ['FODINNERX', 'FHWKHRS']

# Ensure these are strings so the OneHotEncoder doesn't make them floats when encoded
X_trn[categorical_features] = X_trn[categorical_features].round().astype('category')
X_tst[categorical_features] = X_tst[categorical_features].round().astype('category')

# A ColumnTransformer can apply various different transformations to different columns
preprocessor: ColumnTransformer = ColumnTransformer(
    transformers = [
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

X_trn = preprocessor.fit_transform(X_trn)

X_tst = preprocessor.transform(X_tst)

In [98]:
# Get encoded column names
num_cols = preprocessor.transformers_[0][2]
cat_encoder = preprocessor.named_transformers_['cat']
cat_cols = cat_encoder.get_feature_names_out(categorical_features)

# Combine names
all_cols = list(num_cols) + list(cat_cols)

X_trn = pd.DataFrame(X_trn, columns=all_cols)

# Peek at the result
X_trn

Unnamed: 0,FODINNERX,FHWKHRS,SEADPLCXX_1,SEADPLCXX_2,FSSPHW_1,FSSPHW_2,FSSPHW_3,FSSPHW_4,FHHOME_1,FHHOME_2,...,FHPLACE_2,FHPLACE_3,FOSPORT_1,FOSPORT_2,FOLIBRAYX_1,FOLIBRAYX_2,SEABSNT_1,SEABSNT_2,SEABSNT_3,SEABSNT_4
0,1.278135,0.337007,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
1,-0.142512,-0.623998,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
2,0.331037,-0.623998,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
3,1.278135,-0.944333,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
4,1.278135,0.977677,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7940,1.278135,1.938682,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
7941,0.804586,-0.944333,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
7942,-2.036707,0.176839,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
7943,-0.142512,1.137844,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


## Multinomial Logistic Regression

In [99]:
# Now we can train our model!
logistic_regression: LogisticRegressionCV = LogisticRegressionCV(random_state = 42, max_iter = 1000, solver = 'saga', class_weight = 'balanced')

logistic_regression.fit(X_trn, y_trn)

y_hat: np.ndarray = logistic_regression.predict(X_tst)

print(f'The accuracy of the Logistic Regression Model is {accuracy_score(y_tst, y_hat)}')
print(f'The balanced accuracy of the Logistic Regression Model is {balanced_accuracy_score(y_tst, y_hat)}')
print(classification_report(y_tst, y_hat))

The accuracy of the Logistic Regression Model is 0.47400881057268723
The balanced accuracy of the Logistic Regression Model is 0.45776509902588525
              precision    recall  f1-score   support

           1       0.69      0.66      0.67      1665
           2       0.43      0.25      0.31      1192
           3       0.25      0.39      0.30       450
           4       0.12      0.54      0.20        98

    accuracy                           0.47      3405
   macro avg       0.37      0.46      0.37      3405
weighted avg       0.53      0.47      0.49      3405




X does not have valid feature names, but LogisticRegressionCV was fitted with feature names



In [89]:
cm_lr = confusion_matrix(y_tst, y_hat)
lr_cm_fig = display_confusion_matrix(y_tst, y_hat, title = 'Confusion Matrix: Logistic Regression', labels = num_to_grades.values())

In [90]:
lr_cm_fig.write_image('./figures/lr_cm.png', scale = 2)

## K-Nearest Neighbors

In [100]:
knn: KNeighborsClassifier = KNeighborsClassifier(n_neighbors = 9, metric= 'minkowski')

knn.fit(X_trn, y_trn)

y_hat = knn.predict(X_tst)

print(f'The accuracy of the KNN Model is {accuracy_score(y_tst, y_hat)}')
print(f'The balanced accuracy of the KNN is {balanced_accuracy_score(y_tst, y_hat)}')
print(classification_report(y_tst, y_hat))

The accuracy of the KNN Model is 0.520704845814978
The balanced accuracy of the KNN is 0.3218326724546763
              precision    recall  f1-score   support

           1       0.58      0.77      0.66      1665
           2       0.43      0.36      0.39      1192
           3       0.32      0.14      0.19       450
           4       0.22      0.02      0.04        98

    accuracy                           0.52      3405
   macro avg       0.39      0.32      0.32      3405
weighted avg       0.48      0.52      0.49      3405




X does not have valid feature names, but KNeighborsClassifier was fitted with feature names



In [101]:
cm_knn = confusion_matrix(y_tst, y_hat)
knn_cm_fig = display_confusion_matrix(y_tst, y_hat, title = 'Confusion Matrix: KNN', labels = num_to_grades.values())

In [102]:
knn_cm_fig.write_image('./figures/knn_cm.png', scale = 2)

## Bonus: Random Forest

In [103]:
rf: RandomForestClassifier = RandomForestClassifier(n_estimators = 200, max_depth = 9, class_weight='balanced', random_state = 42)
rf.fit(X_trn, y_trn)

y_hat = rf.predict(X_tst)

print(f'The accuracy of the Random Forest Model is {accuracy_score(y_tst, y_hat)}')
print(f'The balanced accuracy of the Random Forest is {balanced_accuracy_score(y_tst, y_hat)}')
print(classification_report(y_tst, y_hat))


The accuracy of the Random Forest Model is 0.47958883994126283
The balanced accuracy of the Random Forest is 0.42939736956515484
              precision    recall  f1-score   support

           1       0.69      0.66      0.67      1665
           2       0.43      0.25      0.32      1192
           3       0.23      0.45      0.31       450
           4       0.14      0.36      0.20        98

    accuracy                           0.48      3405
   macro avg       0.37      0.43      0.37      3405
weighted avg       0.52      0.48      0.49      3405




X does not have valid feature names, but RandomForestClassifier was fitted with feature names



In [105]:
print(pd.Series(data = rf.feature_importances_, index = X_trn.columns))

FODINNERX      0.102149
FHWKHRS        0.121140
SEADPLCXX_1    0.110628
SEADPLCXX_2    0.096622
FSSPHW_1       0.031098
FSSPHW_2       0.022829
FSSPHW_3       0.035797
FSSPHW_4       0.023417
FHHOME_1       0.031622
FHHOME_2       0.016884
FHHOME_3       0.030827
FHHOME_4       0.068860
FHHOME_5       0.040195
FHHOME_6       0.008182
FHPLACE_1      0.016205
FHPLACE_2      0.014297
FHPLACE_3      0.007202
FOSPORT_1      0.025567
FOSPORT_2      0.026001
FOLIBRAYX_1    0.019449
FOLIBRAYX_2    0.018340
SEABSNT_1      0.054131
SEABSNT_2      0.022328
SEABSNT_3      0.025251
SEABSNT_4      0.030979
dtype: float64
