# IMPORTING LIBRARIES

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle

# GETTING DATA FROM DATASET

In [2]:
dataset= pd.read_csv("Village_infras.csv")
dataset.head()

Unnamed: 0,library,primary_school,secondary_school,college,atm,supermarket,panchayat_office,post_office,hospital,bus_stop,common_service_center,weighted_development_score,developed
0,1,1,1,1,1,1,1,1,1,1,1,100.0,1
1,0,0,0,1,1,1,0,0,1,1,1,52.941176,0
2,1,1,0,0,1,1,0,0,1,0,0,47.058824,0
3,1,1,1,1,1,1,1,1,1,1,1,100.0,1
4,0,1,1,0,1,1,1,1,0,1,1,76.470588,1


# SELECTING TARGET VARIABLE

In [3]:
X= dataset.drop(columns= ["weighted_development_score", "developed"])
y= dataset["developed"]

# SPLITTING TRAINING AND TESTING DATA

In [4]:
import numpy as np
noise_percentage = 0.05  
X_noisy = X.copy()
random_mask = np.random.rand(*X_noisy.shape)<noise_percentage

# Flip 0s to 1s and 1s to 0s where the mask is True
X_noisy = np.abs(X_noisy - random_mask.astype(int))

# Split noisy data into train and test sets
X_train_noisy, X_test_noisy, y_train_noisy, y_test_noisy = train_test_split(
    X_noisy, y, test_size=0.2, random_state=42, stratify=y
)

In [5]:
X_test_noisy.head()

Unnamed: 0,library,primary_school,secondary_school,college,atm,supermarket,panchayat_office,post_office,hospital,bus_stop,common_service_center
988,1,1,0,0,0,1,1,0,0,0,0
6357,0,1,1,1,1,0,1,1,1,0,0
1947,1,0,1,1,1,0,0,0,0,0,0
12696,0,1,1,0,0,1,1,1,0,1,1
12694,0,0,1,1,0,1,0,0,1,0,1


# DEFINING, FITTING AND PREDICTING LOGISTIC REGRESSION MODEL

In [7]:
lr_model= LogisticRegression(max_iter=100, random_state=42)

In [9]:
lr_model.fit(X_train_noisy, y_train_noisy)
lr_preds= lr_model.predict(X_test_noisy)
lr_preds

array([0, 1, 0, ..., 0, 0, 0], dtype=int64)

# EVALUATING MODEL

In [11]:
accuracy = accuracy_score(y_test_noisy, lr_preds)
precision = precision_score(y_test_noisy, lr_preds)
recall = recall_score(y_test_noisy, lr_preds)
f1 = f1_score(y_test_noisy, lr_preds)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.955
Precision: 0.9326241134751773
Recall: 0.9392857142857143
F1 Score: 0.9359430604982206


# SAVING THE MODEL

In [12]:
with open('lr_trained_model.pkl', 'wb') as f:
    pickle.dump(lr_model, f)

print("Trained model saved to lr_trained_model.pkl")

Trained model saved to lr_trained_model.pkl
