# **Logistic Regression**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='Purchased'), df['Purchased'])

In [4]:
X_train.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary
69,15595324,Female,31,68000
222,15694879,Male,37,144000
36,15690188,Female,33,28000
258,15569641,Female,58,95000
330,15693264,Male,38,51000


In [5]:
numerical_columns = [0,2,3]
categorical_columns = [1]

In [6]:
handle_numerical = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='mean'))
])

In [7]:
handle_catgorical = Pipeline(steps=[
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OneHotEncoder())
])

In [8]:
preprocessing = ColumnTransformer(transformers=[
    ('numerical', handle_numerical, numerical_columns),
    ('categorical', handle_catgorical, categorical_columns)
], remainder='passthrough')

In [9]:
model = LogisticRegression()

In [10]:
pipe = make_pipeline(preprocessing, model)

In [11]:
pipe.fit(X_train, y_train)

In [12]:
y_pred = pipe.predict(X_test)

In [13]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8
