<a href="https://colab.research.google.com/github/aadya-chopra/Internship-Kernel/blob/main/task4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [20]:
# Load dataset
def load_data(file_path):
    df = pd.read_csv(WA_Fn-UseC_-Telco-Customer-Churn.csv)
    print("Dataset Loaded Successfully!")
    print(df.head())  # Display first few rows
    return df

In [9]:

# Define model versioning function
def get_model_version(model_dir="models/"):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    existing_models = [f for f in os.listdir(model_dir) if f.startswith("churn_model_v")]
    return len(existing_models) + 1

In [13]:

# Preprocess dataset
def preprocess_data(df, target_column):
    df = df.dropna()  # Handling missing values
    label_encoders = {}
      # Encoding categorical features
    for col in df.select_dtypes(include=['object']).columns:
        if col != target_column:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
            label_encoders[col] = le
              # Splitting features and target
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Scaling numerical features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y, scaler, label_encoders

In [14]:
# Train model
def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

In [15]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy:", acc)
    print("Classification Report:\n", classification_report(y_test))
    return acc

In [16]:
# Save model
def save_model(model, scaler, label_encoders, model_version, model_dir="models/"):
    model_path = f"{model_dir}/churn_model_v{model_version}.pkl"
    joblib.dump({"model": model, "scaler": scaler, "encoders": label_encoders}, model_path)
    print(f"Model saved as {model_path}")

In [18]:
# Main pipeline
def main():
    file_path = "/WA_Fn-UseC_-Telco-Customer-Churn.csv"  # Update this with actual file path
    df = load_data(file_path)
    X, y, scaler, label_encoders = preprocess_data(df, target_column="Churn")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = train_model(X_train, y_train)
    accuracy = evaluate_model(model, X_test, y_test)

    # Model versioning
    model_version = get_model_version()
    save_model(model, scaler, label_encoders, model_version)

if __name__ == "__main__":
    main()

NameError: name 'WA_Fn' is not defined

In [7]:












# Evaluate model






NameError: name 'WA_Fn' is not defined