In [1]:
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
# ---------- STEP 1: Load and prepare data ----------
def load_data(filepath):
    df = pd.read_csv(filepath)
    X = df.drop(columns='Class', axis=1)
    Y = df['Class']
    return train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)


In [5]:
# ---------- STEP 2: Train model ----------
def train_model(X_train, Y_train):
    model = LogisticRegression()
    model.fit(X_train, Y_train)
    return model

In [7]:
# ---------- STEP 3: Save model ----------
def save_model(model, filename='credit_card_fraud_model.pkl'):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)
    print(f"Model saved to {filename}")

In [9]:
# ---------- STEP 4: Score new data ----------
def score_model(input_data, model_path='credit_card_fraud_model.pkl', return_proba=False):
    with open(model_path, 'rb') as file:
        model = pickle.load(file)

    if return_proba:
        return model.predict_proba(input_data)
    else:
        return model.predict(input_data)

In [13]:
# ---------- STEP 5: Evaluate model ----------
def evaluate_model(model, X_test, Y_test):
    predictions = model.predict(X_test)
    accuracy = accuracy_score(Y_test, predictions)
    print("Model Accuracy on Test Data:", acccuracy)
    return acc

In [None]:
"""
# ---------- RUN PIPELINE ----------

if __name__ == '__main__':
    # Step 1: Load data
    X_train, X_test, Y_train, Y_test = load_data('creditcard.csv')  # <-- change to your file

    # Step 2: Train
    model = train_model(X_train, Y_train)

    # Step 3: Evaluate
    evaluate_model(model, X_test, Y_test)

    # Step 4: Save
    save_model(model)
"""