In [116]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import OneSidedSelection

In [85]:
df_full = pd.read_csv("StudentsPerformance.csv")

In [86]:
df_full

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95
996,male,group C,high school,free/reduced,none,62,55,55
997,female,group C,high school,free/reduced,completed,59,71,65
998,female,group D,some college,standard,completed,68,78,77


In [87]:
df_math = df_full.drop(columns=["reading score", "writing score"])

In [88]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score
0,female,group B,bachelor's degree,standard,none,72
1,female,group C,some college,standard,completed,69
2,female,group B,master's degree,standard,none,90
3,male,group A,associate's degree,free/reduced,none,47
4,male,group C,some college,standard,none,76
...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88
996,male,group C,high school,free/reduced,none,62
997,female,group C,high school,free/reduced,completed,59
998,female,group D,some college,standard,completed,68


In [89]:
codes, uniques = pd.factorize(df_math["gender"])

In [90]:
codes

array([0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,

In [91]:
uniques

Index(['female', 'male'], dtype='object')

In [92]:
df_math["gender factorized"] = np.array(codes)

In [93]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized
0,female,group B,bachelor's degree,standard,none,72,0
1,female,group C,some college,standard,completed,69,0
2,female,group B,master's degree,standard,none,90,0
3,male,group A,associate's degree,free/reduced,none,47,1
4,male,group C,some college,standard,none,76,1
...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0
996,male,group C,high school,free/reduced,none,62,1
997,female,group C,high school,free/reduced,completed,59,0
998,female,group D,some college,standard,completed,68,0


In [94]:
codes_race, uniques_race = pd.factorize(df_math["race/ethnicity"])

In [95]:
codes_race

array([0, 1, 0, 2, 1, 0, 0, 0, 3, 0, 1, 3, 0, 2, 2, 1, 1, 0, 1, 1, 3, 0,
       3, 1, 3, 2, 0, 1, 1, 3, 3, 0, 4, 3, 4, 4, 3, 3, 3, 0, 1, 1, 0, 0,
       4, 0, 2, 1, 3, 1, 4, 4, 1, 3, 1, 1, 4, 3, 3, 1, 4, 2, 2, 1, 3, 0,
       3, 1, 0, 1, 3, 3, 2, 1, 1, 0, 4, 2, 3, 4, 0, 0, 2, 4, 3, 1, 1, 3,
       2, 3, 1, 1, 1, 1, 0, 1, 0, 4, 3, 3, 0, 3, 3, 0, 1, 1, 3, 4, 0, 0,
       3, 1, 2, 3, 4, 1, 0, 3, 3, 1, 1, 0, 1, 3, 4, 0, 0, 3, 3, 2, 3, 1,
       4, 1, 3, 1, 0, 4, 1, 3, 3, 1, 4, 2, 3, 1, 0, 1, 3, 4, 2, 2, 0, 3,
       3, 1, 4, 0, 0, 3, 0, 4, 0, 1, 4, 1, 1, 0, 0, 1, 2, 4, 3, 1, 1, 1,
       0, 1, 0, 3, 1, 1, 4, 3, 1, 1, 4, 3, 0, 1, 4, 3, 0, 3, 1, 3, 1, 4,
       0, 0, 1, 3, 1, 0, 1, 3, 4, 4, 0, 0, 3, 1, 1, 1, 4, 0, 4, 1, 0, 0,
       3, 0, 1, 3, 0, 4, 1, 3, 2, 1, 3, 1, 0, 4, 1, 3, 3, 3, 0, 1, 3, 4,
       3, 4, 3, 1, 4, 0, 0, 1, 2, 3, 0, 3, 3, 4, 1, 1, 0, 1, 1, 1, 1, 4,
       3, 3, 1, 3, 3, 4, 1, 1, 3, 3, 0, 1, 1, 4, 1, 0, 3, 3, 3, 3, 0, 0,
       4, 0, 0, 4, 1, 3, 1, 4, 3, 0, 2, 4, 1, 3, 2,

In [96]:
uniques_race

Index(['group B', 'group C', 'group A', 'group D', 'group E'], dtype='object')

In [97]:
df_math["race/ethnicity fact"] = np.array(codes_race)

In [98]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact
0,female,group B,bachelor's degree,standard,none,72,0,0
1,female,group C,some college,standard,completed,69,0,1
2,female,group B,master's degree,standard,none,90,0,0
3,male,group A,associate's degree,free/reduced,none,47,1,2
4,male,group C,some college,standard,none,76,1,1
...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4
996,male,group C,high school,free/reduced,none,62,1,1
997,female,group C,high school,free/reduced,completed,59,0,1
998,female,group D,some college,standard,completed,68,0,3


In [99]:
codes_parental, uniques_parental = pd.factorize(df_math["parental level of education"])
uniques_parental

Index(['bachelor's degree', 'some college', 'master's degree',
       'associate's degree', 'high school', 'some high school'],
      dtype='object')

In [100]:
df_math["parental level of education fact"] = np.array(codes_parental)
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact
0,female,group B,bachelor's degree,standard,none,72,0,0,0
1,female,group C,some college,standard,completed,69,0,1,1
2,female,group B,master's degree,standard,none,90,0,0,2
3,male,group A,associate's degree,free/reduced,none,47,1,2,3
4,male,group C,some college,standard,none,76,1,1,1
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2
996,male,group C,high school,free/reduced,none,62,1,1,4
997,female,group C,high school,free/reduced,completed,59,0,1,4
998,female,group D,some college,standard,completed,68,0,3,1


In [101]:
codes_lunch, unique_lunch = pd.factorize(df_math['lunch'])
df_math['lunch fact'] = np.array(codes_lunch)
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact,lunch fact
0,female,group B,bachelor's degree,standard,none,72,0,0,0,0
1,female,group C,some college,standard,completed,69,0,1,1,0
2,female,group B,master's degree,standard,none,90,0,0,2,0
3,male,group A,associate's degree,free/reduced,none,47,1,2,3,1
4,male,group C,some college,standard,none,76,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2,0
996,male,group C,high school,free/reduced,none,62,1,1,4,1
997,female,group C,high school,free/reduced,completed,59,0,1,4,1
998,female,group D,some college,standard,completed,68,0,3,1,0


In [102]:
unique_lunch

Index(['standard', 'free/reduced'], dtype='object')

In [103]:
codes_prep, uniques_prep = pd.factorize(df_math['test preparation course'])
df_math['prep fact'] = np.array(codes_prep)

In [104]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact,lunch fact,prep fact
0,female,group B,bachelor's degree,standard,none,72,0,0,0,0,0
1,female,group C,some college,standard,completed,69,0,1,1,0,1
2,female,group B,master's degree,standard,none,90,0,0,2,0,0
3,male,group A,associate's degree,free/reduced,none,47,1,2,3,1,0
4,male,group C,some college,standard,none,76,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2,0,1
996,male,group C,high school,free/reduced,none,62,1,1,4,1,0
997,female,group C,high school,free/reduced,completed,59,0,1,4,1,1
998,female,group D,some college,standard,completed,68,0,3,1,0,1


In [105]:
uniques_prep

Index(['none', 'completed'], dtype='object')

In [106]:
#Linear regression
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['math score']

X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = LinearRegression()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(r2_score(y_test, y_predict))
print(mean_absolute_error(y_test, y_predict))
print(mean_squared_error(y_test, y_predict))



0.22227313764233203
10.517071638858015
176.23415137322732


In [107]:
#MLP Regressor
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['math score']

X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = MLPRegressor(random_state=0)

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(r2_score(y_test, y_predict))
print(mean_absolute_error(y_test, y_predict))
print(mean_squared_error(y_test, y_predict))


  y = column_or_1d(y, warn=True)


-1.2095761766184827
18.192151423955796
500.6934969436308




In [108]:
# Decision Tree Regressor
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['math score']

X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = DecisionTreeRegressor(random_state=0)

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(r2_score(y_test, y_predict))
print(mean_absolute_error(y_test, y_predict))
print(mean_squared_error(y_test, y_predict))


0.14138132966592287
11.128299307666762
194.5643644875744


In [109]:
# Random Forest Regressor
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['math score']

X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = RandomForestRegressor(random_state=0)

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(r2_score(y_test, y_predict))
print(mean_absolute_error(y_test, y_predict))
print(mean_squared_error(y_test, y_predict))

0.17953291263983584
10.740574981603215
185.91915474315297


  model.fit(X_train, y_train)


In [110]:
# # SVM
# X = pd.DataFrame()

# X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
# X['lunch'] = df_math["lunch fact"]
# X['preparation'] = df_math["prep fact"]

# y = pd.DataFrame()
# y['math'] = df_math['math score']

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# model = SVC(random_state=0)

# model.fit(X_train, y_train)

# y_predict = model.predict(x_test)
# print(r2_score(y_test, y_predict))
# print(mean_absolute_error(y_test, y_predict))
# print(mean_squared_error(y_test, y_predict))

In [111]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact,lunch fact,prep fact
0,female,group B,bachelor's degree,standard,none,72,0,0,0,0,0
1,female,group C,some college,standard,completed,69,0,1,1,0,1
2,female,group B,master's degree,standard,none,90,0,0,2,0,0
3,male,group A,associate's degree,free/reduced,none,47,1,2,3,1,0
4,male,group C,some college,standard,none,76,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2,0,1
996,male,group C,high school,free/reduced,none,62,1,1,4,1,0
997,female,group C,high school,free/reduced,completed,59,0,1,4,1,1
998,female,group D,some college,standard,completed,68,0,3,1,0,1


In [112]:
df_math.loc[df_math["math score"] < 60, "pass/fail"] = 0

In [113]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact,lunch fact,prep fact,pass/fail
0,female,group B,bachelor's degree,standard,none,72,0,0,0,0,0,
1,female,group C,some college,standard,completed,69,0,1,1,0,1,
2,female,group B,master's degree,standard,none,90,0,0,2,0,0,
3,male,group A,associate's degree,free/reduced,none,47,1,2,3,1,0,0.0
4,male,group C,some college,standard,none,76,1,1,1,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2,0,1,
996,male,group C,high school,free/reduced,none,62,1,1,4,1,0,
997,female,group C,high school,free/reduced,completed,59,0,1,4,1,1,0.0
998,female,group D,some college,standard,completed,68,0,3,1,0,1,


In [114]:
df_math.loc[df_math["math score"] >= 60, "pass/fail"] = 1

In [115]:
df_math

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,gender factorized,race/ethnicity fact,parental level of education fact,lunch fact,prep fact,pass/fail
0,female,group B,bachelor's degree,standard,none,72,0,0,0,0,0,1.0
1,female,group C,some college,standard,completed,69,0,1,1,0,1,1.0
2,female,group B,master's degree,standard,none,90,0,0,2,0,0,1.0
3,male,group A,associate's degree,free/reduced,none,47,1,2,3,1,0,0.0
4,male,group C,some college,standard,none,76,1,1,1,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,0,4,2,0,1,1.0
996,male,group C,high school,free/reduced,none,62,1,1,4,1,0,1.0
997,female,group C,high school,free/reduced,completed,59,0,1,4,1,1,0.0
998,female,group D,some college,standard,completed,68,0,3,1,0,1,1.0


In [121]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
#Logistic regression
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['pass/fail']

# smt = SMOTE()
# X_train_sm, y_train_sm = smt.fit_resample(X, y)
# X_train, x_test, y_train, y_test = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=0)

oss = OneSidedSelection()
X_train_oss, y_train_oss = oss.fit_resample(X, y)
X_train, x_test, y_train, y_test = train_test_split(X_train_oss, y_train_oss, test_size=0.2, random_state=0)

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = LogisticRegression()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))


              precision    recall  f1-score   support

         0.0       0.62      0.21      0.31        62
         1.0       0.73      0.94      0.82       138

    accuracy                           0.71       200
   macro avg       0.67      0.58      0.57       200
weighted avg       0.69      0.71      0.66       200



  y = column_or_1d(y, warn=True)


In [123]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
#Naive Bayes regression
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['pass/fail']

# smt = SMOTE()
# X_train_sm, y_train_sm = smt.fit_resample(X, y)
# X_train, x_test, y_train, y_test = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=0)

oss = OneSidedSelection()
X_train_oss, y_train_oss = oss.fit_resample(X, y)
X_train, x_test, y_train, y_test = train_test_split(X_train_oss, y_train_oss, test_size=0.2, random_state=0)

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = GaussianNB()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

         0.0       0.62      0.21      0.31        62
         1.0       0.73      0.94      0.82       138

    accuracy                           0.71       200
   macro avg       0.67      0.58      0.57       200
weighted avg       0.69      0.71      0.66       200



  y = column_or_1d(y, warn=True)


In [125]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
# KNN
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['pass/fail']

# smt = SMOTE()
# X_train_sm, y_train_sm = smt.fit_resample(X, y)
# X_train, x_test, y_train, y_test = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=0)

oss = OneSidedSelection()
X_train_oss, y_train_oss = oss.fit_resample(X, y)
X_train, x_test, y_train, y_test = train_test_split(X_train_oss, y_train_oss, test_size=0.2, random_state=0)

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = KNeighborsClassifier()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

         0.0       0.62      0.21      0.31        62
         1.0       0.73      0.94      0.82       138

    accuracy                           0.71       200
   macro avg       0.67      0.58      0.57       200
weighted avg       0.69      0.71      0.66       200



  return self._fit(X, y)


In [127]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
# Decision Tree Classifier
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['pass/fail']

# smt = SMOTE()
# X_train_sm, y_train_sm = smt.fit_resample(X, y)
# X_train, x_test, y_train, y_test = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=0)

oss = OneSidedSelection()
X_train_oss, y_train_oss = oss.fit_resample(X, y)
X_train, x_test, y_train, y_test = train_test_split(X_train_oss, y_train_oss, test_size=0.2, random_state=0)

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = DecisionTreeClassifier()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

         0.0       0.65      0.21      0.32        62
         1.0       0.73      0.95      0.82       138

    accuracy                           0.72       200
   macro avg       0.69      0.58      0.57       200
weighted avg       0.70      0.72      0.67       200



In [129]:
# SVC
X = pd.DataFrame()

X["gender"] = df_math['gender factorized']
# X["race/ethnicity"] = df_math['race/ethnicity fact']
# X['parental'] = df_math["parental level of education fact"]
X['lunch'] = df_math["lunch fact"]
X['preparation'] = df_math["prep fact"]

y = pd.DataFrame()
y['math'] = df_math['pass/fail']

# smt = SMOTE()
# X_train_sm, y_train_sm = smt.fit_resample(X, y)
# X_train, x_test, y_train, y_test = train_test_split(X_train_sm, y_train_sm, test_size=0.2, random_state=0)

oss = OneSidedSelection()
X_train_oss, y_train_oss = oss.fit_resample(X, y)
X_train, x_test, y_train, y_test = train_test_split(X_train_oss, y_train_oss, test_size=0.2, random_state=0)

# X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

model = SVC()

model.fit(X_train, y_train)

y_predict = model.predict(x_test)
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

         0.0       0.65      0.21      0.32        62
         1.0       0.73      0.95      0.82       138

    accuracy                           0.72       200
   macro avg       0.69      0.58      0.57       200
weighted avg       0.70      0.72      0.67       200



  y = column_or_1d(y, warn=True)
