In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
 
# Load your dataset from an Excel file
df = pd.read_csv('Data2.csv')
 
# Drop the 'SEQNO' column
df = df.drop(columns=['SEQNO'])
 
# Drop rows with NaN values (or use imputation)
df = df.dropna()
 
# Convert categorical variables to dummy variables
df = pd.get_dummies(df, drop_first=True)
 
# Splitting the dataset into training and testing sets
X = df.drop('DIABETE4', axis=1)
y = df['DIABETE4']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
 
# Fit the Logistic Regression Model
model = LogisticRegression(random_state=42, max_iter=10000)
model.fit(X_train_scaled, y_train)
 
# Making predictions on the test set
y_pred = model.predict(X_test_scaled)
 
# Calculating the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy of the model:", accuracy)
 
# Retrieve and display the feature importances
feature_importance = pd.DataFrame(model.coef_[0], 
                                  index=X.columns, 
                                  columns=['importance']).sort_values('importance', ascending=False)
print(feature_importance)
feature_importance.to_csv("LR-B.csv")

Accuracy of the model: 0.8663101604278075
                             importance
GENHLTH_Fair                   0.957641
MARITAL_Never Married          0.755422
MEDCOST1_Yes                   0.667113
CHECKUP1_Within past year      0.627869
_AGE_G_45-54                   0.626160
...                                 ...
CHECKUP1_3-5 Years            -0.661948
ALCCALCCAT_3-5 drinks a day   -0.679564
_BMI5CAT_Overweight           -0.690769
_PHYS14D_Zero days            -0.777696
_INCOMG1_$35,000 - $50,000    -0.843288

[84 rows x 1 columns]
