In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import os
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv(r"C:\Users\alsha\Downloads\fraud_dataset.csv")

# Encode categorical columns if any
categorical_columns = ['location', 'time_of_day']  # Replace with actual categorical columns in your dataset
label_encoder = LabelEncoder()

# Apply LabelEncoder to each categorical column
for col in categorical_columns:
    data[col] = label_encoder.fit_transform(data[col])

# Separate features and labels
X = data.drop(columns=['is_fraud'])
y = data['is_fraud']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Ensure the 'models' directory exists
if not os.path.exists('models'):
    os.makedirs('models')

# Save the model
joblib.dump(model, 'models/fraud_model.pkl')


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        96
           1       1.00      1.00      1.00       104

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



['models/fraud_model.pkl']

In [4]:
# طباعة أسماء الأعمدة
print(data.columns)


Index(['transaction_id', 'amount', 'user_age', 'location', 'login_attempts',
       'time_of_day', 'is_fraud'],
      dtype='object')


In [11]:
import pandas as pd
import joblib
import os

# تأكد من وجود مجلد البيانات
os.makedirs('data', exist_ok=True)

# إنشاء بيانات جديدة مشابهة لتنسيق البيانات الأصلية (بما في ذلك transaction_id)
new_data = pd.DataFrame([
    {'transaction_id': 2001, 'amount': 1500, 'user_age': 29, 'location': 0, 'login_attempts': 2, 'time_of_day': 1},
    {'transaction_id': 2002, 'amount': 250, 'user_age': 40, 'location': 1, 'login_attempts': 1, 'time_of_day': 0},
    {'transaction_id': 2003, 'amount': 9000, 'user_age': 22, 'location': 2, 'login_attempts': 4, 'time_of_day': 2},
])

# حفظ البيانات إلى ملف CSV
new_data.to_csv('data/new_transactions.csv', index=False)

# تحميل النموذج
model = joblib.load('models/fraud_model.pkl')

# تحميل البيانات الجديدة
new_data = pd.read_csv('data/new_transactions.csv')

# التنبؤ مباشرة (لا تزيل transaction_id)
predictions = model.predict(new_data)
new_data['fraud_prediction'] = predictions

# حفظ النتائج
new_data.to_csv('data/predictions.csv', index=False)

# عرض عدد التوقعات لكل فئة
print(new_data[['fraud_prediction']].value_counts())


fraud_prediction
0                   2
1                   1
Name: count, dtype: int64
