In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Define the data
data = {
    'customerID': ['001', '002', '003', '004', '005'],
    'gender': ['Male', 'Female', 'Male', 'Male', 'Female'],
    'SeniorCitizen': [0, 1, 0, 0, 1],
    'Partner': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'Dependents': ['Yes', 'No', 'No', 'No', 'Yes'],
    'tenure': [10, 5, 3, 8, 2],
    'PhoneService': ['Yes', 'Yes', 'No', 'Yes', 'Yes'],
    'MultipleLines': ['No', 'No', 'No phone service', 'Yes', 'Yes'],
    'InternetService': ['DSL', 'Fiber optic', 'Fiber optic', 'DSL', 'DSL'],
    'OnlineSecurity': ['No', 'No', 'No', 'Yes', 'No'],
    'OnlineBackup': ['Yes', 'No', 'No', 'Yes', 'No'],
    'DeviceProtection': ['No', 'No', 'No', 'Yes', 'No'],
    'TechSupport': ['No', 'No', 'No', 'No', 'No'],
    'StreamingTV': ['No', 'Yes', 'Yes', 'No', 'No'],
    'StreamingMovies': ['No', 'No', 'Yes', 'Yes', 'No'],
    'Contract': ['Month-to-month', 'One year', 'Month-to-month', 'Two year', 'Month-to-month'],
    'PaperlessBilling': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'PaymentMethod': ['Electronic check', 'Mailed check', 'Mailed check', 'Bank transfer (automatic)', 'Electronic check'],
    'MonthlyCharges': [50.0, 70.0, 80.0, 60.0, 55.0],
    'TotalCharges': [500.0, 350.0, 240.0, 480.0, 110.0],
    'Churn': ['No', 'Yes', 'No', 'No', 'Yes']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save DataFrame to CSV file
df.to_csv('Telco-Customer-Churn.csv', index=False)

# Load the dataset
url = 'Telco-Customer-Churn.csv'
df = pd.read_csv(url)

# Data preprocessing
# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod'])

# Convert 'Churn' column to binary numeric values
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

# Split data into features and target variable
X = df.drop(columns=['customerID', 'Churn'])
y = df['Churn']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)  # Added stratify=y

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model training
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Model evaluation
y_pred = model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("ROC AUC Score:", roc_auc)


Accuracy: 0.5
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
ROC AUC Score: 0.5


  _warn_prf(average, modifier, msg_start, len(result))
