<a href="https://colab.research.google.com/github/gabrielnichio/ad-click-classification/blob/main/ad_click_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [52]:
import pandas as pd
import plotly.express as px

In [53]:
data = pd.read_csv("/content/ad_click_dataset.csv")

# Droping Null values

In [54]:
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)
data.drop(columns=['id'], inplace=True)

# Droping useless columns

In [55]:
data.drop(columns='full_name', inplace=True)

In [56]:
data['click'] = data['click'].replace({1: 'yes', 0: 'no'})
data

Unnamed: 0,age,gender,device_type,ad_position,browsing_history,time_of_day,click
0,56.0,Female,Tablet,Bottom,News,Morning,yes
1,43.0,Male,Tablet,Bottom,Education,Afternoon,yes
2,37.0,Male,Mobile,Top,News,Evening,no
3,49.0,Male,Mobile,Top,News,Morning,yes
4,59.0,Female,Desktop,Bottom,Social Media,Morning,no
...,...,...,...,...,...,...,...
811,28.0,Female,Desktop,Bottom,News,Evening,yes
812,41.0,Non-Binary,Mobile,Side,Education,Night,yes
813,64.0,Non-Binary,Desktop,Top,Entertainment,Morning,no
814,52.0,Female,Desktop,Bottom,Shopping,Afternoon,yes


# Data Analysis

In [57]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 816 entries, 0 to 815
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               816 non-null    float64
 1   gender            816 non-null    object 
 2   device_type       816 non-null    object 
 3   ad_position       816 non-null    object 
 4   browsing_history  816 non-null    object 
 5   time_of_day       816 non-null    object 
 6   click             816 non-null    object 
dtypes: float64(1), object(6)
memory usage: 44.8+ KB


In [58]:
px.histogram(data, x='click', text_auto=True)

In [59]:
px.histogram(data, x='gender', text_auto=True, color='click', barmode='group')

In [60]:
px.histogram(data, x='time_of_day', text_auto=True, color='click', barmode='group')

In [61]:
px.histogram(data, x='browsing_history', text_auto=True, color='click', barmode='group')

In [62]:
px.box(data, x='age', color='click')

# Separating features and target


In [63]:
x = data.drop(columns="click")
y = data['click']

In [66]:
x

array([[ 1.,  0.,  0., ...,  1.,  0., 56.],
       [ 0.,  1.,  0., ...,  0.,  0., 43.],
       [ 0.,  1.,  0., ...,  0.,  0., 37.],
       ...,
       [ 0.,  0.,  1., ...,  1.,  0., 64.],
       [ 1.,  0.,  0., ...,  0.,  0., 52.],
       [ 0.,  1.,  0., ...,  1.,  0., 44.]])

# One Hot Encoding

In [64]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer

In [65]:
column_names = x.columns

one_hot = make_column_transformer((
    OneHotEncoder(drop='if_binary', sparse_output=False),
    ["gender", "device_type", "ad_position", "browsing_history", "time_of_day"]
),
  remainder='passthrough',
  sparse_threshold=0
)

x = one_hot.fit_transform(x)

pd.DataFrame(x, columns=one_hot.get_feature_names_out(column_names))

Unnamed: 0,onehotencoder__gender_Female,onehotencoder__gender_Male,onehotencoder__gender_Non-Binary,onehotencoder__device_type_Desktop,onehotencoder__device_type_Mobile,onehotencoder__device_type_Tablet,onehotencoder__ad_position_Bottom,onehotencoder__ad_position_Side,onehotencoder__ad_position_Top,onehotencoder__browsing_history_Education,onehotencoder__browsing_history_Entertainment,onehotencoder__browsing_history_News,onehotencoder__browsing_history_Shopping,onehotencoder__browsing_history_Social Media,onehotencoder__time_of_day_Afternoon,onehotencoder__time_of_day_Evening,onehotencoder__time_of_day_Morning,onehotencoder__time_of_day_Night,remainder__age
0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,56.0
1,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,43.0
2,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,37.0
3,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,49.0
4,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,59.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,28.0
812,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,41.0
813,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,64.0
814,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,52.0


# LabelEncoder