<a href="https://colab.research.google.com/github/Yogi-Puvvala/Machine_Learning/blob/main/AdaBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **AdaBoost (Classifier)**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier

In [2]:
df = pd.read_csv("drive/MyDrive/Colab_Projects/train.csv")

In [3]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
df.isna().sum()

Unnamed: 0,0
PassengerId,0
Survived,0
Pclass,0
Name,0
Sex,0
Age,177
SibSp,0
Parch,0
Ticket,0
Fare,0


In [5]:
x = df["Age"].mean()
x1 = float(f"{x:.2f}")
df["Age"] = df["Age"].fillna(x1)

In [6]:
df.drop("Name", axis = 1, inplace = True)

In [7]:
df["Cabin"] = df["Cabin"].fillna(df["Cabin"].mode()[0])

In [8]:
df["Embarked"] = df["Embarked"].fillna(df["Embarked"].mode()[0])

In [9]:
df.isna().sum()

Unnamed: 0,0
PassengerId,0
Survived,0
Pclass,0
Sex,0
Age,0
SibSp,0
Parch,0
Ticket,0
Fare,0
Cabin,0


In [10]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,male,22.0,1,0,A/5 21171,7.25,B96 B98,S
1,2,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,female,26.0,0,0,STON/O2. 3101282,7.925,B96 B98,S
3,4,1,1,female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,male,35.0,0,0,373450,8.05,B96 B98,S


In [11]:
X = df.drop("Survived", axis = 1)
y = df["Survived"]

In [12]:
numerical_cols = [col for col in X.columns if (df[col].dtype == "float64" or df[col].dtype == "int64")]
categorical_cols = [col for col in X.columns if df[col].dtype == "O"]

In [13]:
preprocessor = ColumnTransformer([
    ("num", "passthrough", numerical_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_cols)
])

In [14]:
adac = Pipeline([
    ("preprocess", preprocessor),
    ("model", AdaBoostClassifier())
])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
adac.fit(X_train, y_train)

In [17]:
print("Training Score:", adac.score(X_train, y_train))
print("Testing Score:", adac.score(X_test, y_test))

Training Score: 0.8258426966292135
Testing Score: 0.8044692737430168


In [18]:
print("Classification Report:", classification_report(y_test, adac.predict(X_test)))
print("Accuracy:", accuracy_score(y_test, adac.predict(X_test)))

Classification Report:               precision    recall  f1-score   support

           0       0.80      0.90      0.84       105
           1       0.82      0.68      0.74        74

    accuracy                           0.80       179
   macro avg       0.81      0.79      0.79       179
weighted avg       0.81      0.80      0.80       179

Accuracy: 0.8044692737430168


# **AdaBoot (Regressor)**

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import AdaBoostRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score, r2_score
from xgboost import XGBRegressor

In [20]:
df = pd.read_csv("drive/MyDrive/Colab_Projects/StudentsPerformance.csv")

In [21]:
df.shape

(1000, 8)

In [22]:
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [23]:
df.isna().sum()

Unnamed: 0,0
gender,0
race/ethnicity,0
parental level of education,0
lunch,0
test preparation course,0
math score,0
reading score,0
writing score,0


In [24]:
df["race/ethnicity"].value_counts()

Unnamed: 0_level_0,count
race/ethnicity,Unnamed: 1_level_1
group C,319
group D,262
group B,190
group E,140
group A,89


In [25]:
df["parental level of education"].value_counts()

Unnamed: 0_level_0,count
parental level of education,Unnamed: 1_level_1
some college,226
associate's degree,222
high school,196
some high school,179
bachelor's degree,118
master's degree,59


In [26]:
df["lunch"].value_counts()

Unnamed: 0_level_0,count
lunch,Unnamed: 1_level_1
standard,645
free/reduced,355


In [27]:
df["test preparation course"].value_counts()

Unnamed: 0_level_0,count
test preparation course,Unnamed: 1_level_1
none,642
completed,358


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   gender                       1000 non-null   object
 1   race/ethnicity               1000 non-null   object
 2   parental level of education  1000 non-null   object
 3   lunch                        1000 non-null   object
 4   test preparation course      1000 non-null   object
 5   math score                   1000 non-null   int64 
 6   reading score                1000 non-null   int64 
 7   writing score                1000 non-null   int64 
dtypes: int64(3), object(5)
memory usage: 62.6+ KB


In [29]:
X = df.drop("math score", axis = 1)
y = df["math score"]

In [30]:
numerical_cols = [col for col in X.columns if df[col].dtype == "int64"]
nominal_cols = ["gender"]
ordinal_cols = [col for col in X.columns if df[col].dtype == "O" and col not in nominal_cols]

order = [["group E", "group D", "group C", "group B", "group A"], ["some college", "associate's degree", "high school", "some high school", "bachelor's degree", "master's degree"], ['free/reduced', 'standard'], ['none', 'completed']]

In [31]:
preprocessor = ColumnTransformer([
    ("num", "passthrough", numerical_cols),
    ("nom", OneHotEncoder(), nominal_cols),
    ("ord", OrdinalEncoder(), ordinal_cols)
])

In [32]:
adar = Pipeline([
    ("preprocess", preprocessor),
    ("model", AdaBoostRegressor())
])

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

In [34]:
adar.fit(X_train, y_train)

In [35]:
print("Training Score:", adar.score(X_train, y_train))
print("Testing Score:", adar.score(X_test, y_test))

Training Score: 0.852077070623426
Testing Score: 0.8289989832772044


In [36]:
print("classification report:", r2_score(y_test, adar.predict(X_test)))

classification report: 0.8289989832772044
