In [None]:
Day 15: Classification with Logistic Regression
Introduction to classification problems
Binary classification using logistic regression.
Performance metrics: Accuracy, Precision, Recall, F1-Score

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# load dataset
data = sns.load_dataset('titanic')
data.head(2)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False


In [None]:
# age , fare and survived
data = data[['survived', 'age', 'fare']]

In [None]:
# clean the data
data.isna().sum()

Unnamed: 0,0
survived,0
age,177
fare,0


In [None]:
data = data.dropna()
data.isna().sum()

Unnamed: 0,0
survived,0
age,0
fare,0


In [None]:
# Split into X and y
X= data[['age', 'fare']]
y= data['survived']

In [None]:
# Train the model
from sklearn.linear_model   import LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
# train the model
model.fit(X,y)

In [None]:
# Predict the values
y_pred = model.predict(X)
y_pred[0:5]

array([0, 1, 0, 0, 0])

In [None]:
# Performance metrics: Accuracy, Precision, Recall, F1-Score.

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, f1_score, precision_score

In [None]:
# Confusion matrix

In [None]:
cm = confusion_matrix(y, y_pred)
cm

array([[380,  44],
       [201,  89]])

In [None]:
# precision
pres = precision_score(y, y_pred)

In [None]:
print(f"Precision Score: {pres}")

Precision Score: 0.6691729323308271


In [None]:
# recall score
rec = recall_score(y, y_pred)
print(f"Recall Score : {rec}")

Recall Score : 0.30689655172413793


In [None]:
# f1 Score
f1_sco = f1_score(y, y_pred)
print(f"F1 Score :{f1_sco}")

F1 Score :0.42080378250591016


In [None]:
# Multiplie (Multi) Classfication

In [None]:
data_peng = sns.load_dataset('penguins')

In [None]:
data_peng.shape

(344, 7)

In [None]:
data_peng.head(2)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female


In [None]:
data_peng.drop(['island','sex'], axis=1,inplace=True)

In [None]:
data_peng.isna().sum()

Unnamed: 0,0
species,0
bill_length_mm,2
bill_depth_mm,2
flipper_length_mm,2
body_mass_g,2


In [None]:
data_peng = data_peng.dropna()

In [None]:
data_peng.isna().sum()

Unnamed: 0,0
species,0
bill_length_mm,0
bill_depth_mm,0
flipper_length_mm,0
body_mass_g,0


In [None]:
X = data_peng.drop('species', axis=1)
X.head(2)

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
0,39.1,18.7,181.0,3750.0
1,39.5,17.4,186.0,3800.0


In [None]:
y = data_peng['species']
y.head(2)

Unnamed: 0,species
0,Adelie
1,Adelie


In [None]:
lr = LogisticRegression(max_iter =1000)
lr

In [None]:
# fit the model
lr.fit(X,y)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
y_pred = lr.predict(X)

In [None]:
y_pred[0:5]

array(['Adelie', 'Adelie', 'Adelie', 'Adelie', 'Adelie'], dtype=object)

In [None]:
# Project Mobile Fone Price Prediction
import ipywidgets as widgets
from IPython.display import display

In [None]:
upload_widget = widgets.FileUpload(
    accept='.csv',   # only CSV files
    multiple=False   # single file upload
)

display(upload_widget)

FileUpload(value={}, accept='.csv', description='Upload')

In [None]:
# function to read csv file
def load_csv_from_widget(upload_widget):
    if len(upload_widget.value) == 0:
        print("No file uploaded.")
        return None

    uploaded_file = list(upload_widget.value.values())[0]

    # Convert bytes → file-like object
    csv_bytes = uploaded_file['content']
    csv_buffer = io.BytesIO(csv_bytes)

    df = pd.read_csv(csv_buffer)
    return df


In [None]:
import io

In [None]:
df = load_csv_from_widget(upload_widget)
df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,336,670,869,18,10,19,1,1,1,0


In [None]:
data = df.copy()

In [None]:
data.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [None]:
data['price_range'].unique()

array([1, 2, 3, 0])

In [None]:
# Logisictic Regression
# check na
data.isna().sum()

Unnamed: 0,0
battery_power,0
blue,0
clock_speed,0
dual_sim,0
fc,0
four_g,0
int_memory,0
m_dep,0
mobile_wt,0
n_cores,0


In [None]:
# Split data in to X and y

In [None]:
X = data.drop('price_range', axis=1)

In [None]:
y = data['price_range']

In [None]:
X.shape

(2000, 20)

In [None]:
# binary (0,1)
# multpler (0,1,.....)
data['price_range'].unique()

array([1, 2, 3, 0])

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
# Fit (train) the model

In [None]:
model.fit(X,y)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# Predict the values
y_pred = model.predict(X)

In [None]:
y_pred[0:5]

array([1, 2, 2, 3, 1])

In [None]:
# Accuracy_score, f1_score
from sklearn.metrics import accuracy_score , f1_score

In [None]:
acs = accuracy_score(y, y_pred)
acs

0.6355

In [None]:
# f1Sore
f_score = f1_score(y,y_pred)

ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

In [None]:
 act   pradi
0 1
1 1
1 1
2 2
3 3
1 1
0 0
1 1
9/ 10
90%

In [None]:
micro macro
Multiple Classfication
0
1
2
3...
True [1] (postive, negative)
False [0] (postive , negative)