# XGBoost Regression Example

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Preprocess and normalize data

In [None]:
def load_data():
    # Load data using Pandas DataFrame
    df = pd.read_csv("data/Admission_Predict_Ver1.1.csv")
    # Drop the serial number column
    df = df.drop("Serial No.", axis=1)

    # Scale only the features
    labels = df["Chance of Admit "]
    df = df.drop("Chance of Admit ", axis=1)

    scaler = MinMaxScaler()
    df[df.columns] = scaler.fit_transform(df[df.columns])

    # Add back in the labels
    df["Chance of Admit "] = labels
    return df


df = load_data()
df.head(5)

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,0.94,0.928571,0.75,0.875,0.875,0.913462,1.0,0.92
1,0.68,0.535714,0.75,0.75,0.875,0.663462,1.0,0.76
2,0.52,0.428571,0.5,0.5,0.625,0.384615,1.0,0.72
3,0.64,0.642857,0.5,0.625,0.375,0.599359,1.0,0.8
4,0.48,0.392857,0.25,0.25,0.5,0.451923,0.0,0.65


# Splitting into training and testing data

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    df[df.columns[:-1]], df["Chance of Admit "], test_size=0.2
)

# Build XGBoost model and enter hyperparameters

In [None]:
xgb_model = xgb.XGBRegressor(
    objective="reg:squarederror",
    random_state=42,
    n_estimators=1000,
    learning_rate=0.01,
    max_depth=8,
    verbosity=1,
    use_label_encoder=False,
)

https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBRegressor

# Training XGBoost

In [None]:
xgb_model.fit(X_train, y_train)

In [None]:
# Make predictions
predictions = xgb_model.predict(X_test)
# Evaluate the model
score = xgb_model.score(X_test, y_test)
print(f"Model Accuracy: {score}")

Model Accuracy: 0.7632886045033669
