<a href="https://colab.research.google.com/github/sujithchowdaryunnam/predictive-analysis/blob/credit-card-fraud-detection/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Step 1: Load Dataset
data = pd.read_csv('creditcard_2023.csv')

# Step 2: Clean column names
data.columns = data.columns.str.strip()

# Step 3: Check columns
print("🧾 Columns in Dataset:\n", data.columns)

# Step 4: Define Target and Features
if 'Class' not in data.columns:
    raise ValueError("❌ 'Class' column (target) not found in dataset.")

X = data.drop(columns=['Class'])
y = data['Class']

# Optional: Remove ID/Time columns if any
non_useful_columns = [col for col in X.columns if 'id' in col.lower() or 'time' in col.lower()]
X = X.drop(columns=non_useful_columns)

# Keep only numeric features
X = X.select_dtypes(include='number')

# Step 5: Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Step 7: Train Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 8: Evaluate the Model
y_pred = model.predict(X_test)
print("\n📊 Model Evaluation:")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Step 9: Dynamic User Input
print("\n🔎 Predict Transaction from User Input")
user_input = []
feature_names = X.columns

for feature in feature_names:
    while True:
        try:
            value = float(input(f"Enter value for '{feature}': "))
            user_input.append(value)
            break
        except ValueError:
            print("⚠️ Please enter a valid number.")

# Step 10: Predict from input
input_scaled = scaler.transform([user_input])
prediction = model.predict(input_scaled)[0]

# Step 11: Output result
print("\n🧠 Prediction Result:")
if prediction == 1:
    print("⚠️ The transaction is predicted as: FRAUDULENT")
else:
    print("✅ The transaction is predicted as: LEGITIMATE")


🧾 Columns in Dataset:
 Index(['id', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

📊 Model Evaluation:
[[55591  1272]
 [ 2715 54148]]
              precision    recall  f1-score   support

           0       0.95      0.98      0.97     56863
           1       0.98      0.95      0.96     56863

    accuracy                           0.96    113726
   macro avg       0.97      0.96      0.96    113726
weighted avg       0.97      0.96      0.96    113726


🔎 Predict Transaction from User Input
Enter value for 'V1': 0.99999
Enter value for 'V2': 1.09090
Enter value for 'V3': -1.09099
Enter value for 'V4': -0.09098
Enter value for 'V5': 1.23456
Enter value for 'V6': 0.90405
Enter value for 'V7': 0.00405
Enter value for 'V8': 1.00405
Enter value for 'V9': 0.09090
Enter value for 'V10': -

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

# Load the data
data = pd.read_csv('student_data.csv')

# Strip whitespace from column names
data.columns = data.columns.str.strip()

# Print columns
print("Cleaned columns:", list(data.columns))

# Drop non-numeric or ID columns not useful for prediction
data = data.drop(columns=['Student ID', 'Student Name', 'Date of Birth'])

# Convert categorical columns using one-hot encoding
data = pd.get_dummies(data, drop_first=True)

# Define target column
target_column = 'Fees'  # Or use 'Discount on Fees' if preferred

# Features and target
X = data.drop(columns=[target_column])
y = data[target_column]

# Save feature names for user input
feature_names = X.columns

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("\nModel Performance:")
print("R2 Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# User input for prediction
print("\n--- Predict Student Fees ---")
user_input = []

for feature in feature_names:
    val = float(input(f"Enter value for '{feature}': "))
    user_input.append(val)

# Predict
input_df = pd.DataFrame([user_input], columns=feature_names)
predicted = model.predict(input_df)

print(f"\n🎯 Predicted Student Fees: ₹{predicted[0]:,.2f}")


Cleaned columns: ['Student ID', 'Student Name', 'Date of Birth', 'Field of Study', 'Year of Admission', 'Expected Year of Graduation', 'Current Semester', 'Specialization', 'Fees', 'Discount on Fees']

Model Performance:
R2 Score: 0.23455421459269243
Mean Squared Error: 1439525220.739343

--- Predict Student Fees ---
Enter value for 'Year of Admission': 2022
Enter value for 'Expected Year of Graduation': 2026
Enter value for 'Current Semester': 7
Enter value for 'Discount on Fees': 100000
Enter value for 'Field of Study_Civil Engineering': 0
Enter value for 'Field of Study_Computer Science': 1
Enter value for 'Field of Study_Electrical Engineering': 0
Enter value for 'Field of Study_Mechanical Engineering': 0
Enter value for 'Specialization_Data Science': 1
Enter value for 'Specialization_Machine Learning': 0
Enter value for 'Specialization_Network Security': 0
Enter value for 'Specialization_Web Development': 0

🎯 Predicted Student Fees: ₹337,149.27
