# Pandas Apply Examples

In [3]:

import pandas as pd

# Load the dataset
file_path = 'finance_dataset_adjusted.csv'
df = pd.read_csv(file_path)





In [4]:
# Example 1: Convert all `Amount` values to integers
df['Amount'] = df['Amount'].apply(lambda x: int(x))

In [5]:
# Example 2: Flag transactions as 'High' or 'Low' based on the `Amount`
df['Amount_Flag'] = df['Amount'].apply(lambda x: 'High' if x > 5000 else 'Low')

    

In [6]:

# Example 3: Extract the year from the `Date` column
df['Year'] = df['Date'].apply(lambda x: pd.to_datetime(x).year)


In [7]:

# Example 4: Calculate percentage of `Amount` relative to `Account_Balance`
df['Amount_Percentage'] = df.apply(lambda row: (row['Amount'] / row['Account_Balance']) * 100, axis=1)


In [8]:

# Example 10: Combine `Merchant` and `Category` into a single string
df['Merchant_Category'] = df.apply(lambda row: f"{row['Merchant']} ({row['Category']})", axis=1)



In [9]:
# Display the resulting DataFrame
df.head()

Unnamed: 0,Transaction_ID,Account_Number,Transaction_Type,Amount,Date,Merchant,Account_Balance,Category,Fraud_Flag,Amount_Flag,Year,Amount_Percentage,Merchant_Category
0,8878612,1600350,Transfer,3510,2023-04-30,Armstrong Inc,91071.08,Utilities,False,Low,2023,3.854132,Armstrong Inc (Utilities)
1,2733712,6056870,Deposit,2522,2023-06-22,Johnson and Sons,75544.71,Groceries,False,Low,2023,3.33842,Johnson and Sons (Groceries)
2,5836221,3737324,Transfer,9443,2022-09-19,Lopez-Luna,59332.32,Health,False,High,2022,15.91544,Lopez-Luna (Health)
3,9755716,1106801,Deposit,9390,2023-03-28,Berry LLC,90189.54,Travel,False,High,2023,10.411407,Berry LLC (Travel)
4,9204424,5001180,Withdrawal,1608,2023-07-25,Smith-Fletcher,38271.1,Groceries,True,Low,2023,4.201604,Smith-Fletcher (Groceries)


###  line by line processing

In [10]:

import pandas as pd

# Read the CSV file
file_path = 'finance_dataset_adjusted.csv'
df = pd.read_csv(file_path)

In [11]:
# Initialize variables to track various metrics
total_amount = 0
total_fraud = 0
transfer_count = 0
deposit_count = 0
withdrawal_count = 0
fraud_transactions = []
category_spend = {}

In [12]:

# Loop through the DataFrame row by row
for index, row in df.iterrows():
    
    # 1. Extract the necessary columns
    transaction_id = row['Transaction_ID']
    transaction_type = row['Transaction_Type']
    amount = row['Amount']
    fraud_flag = row['Fraud_Flag']
    category = row['Category']
    
    # 2. Add the transaction amount to the total amount
    total_amount += amount
    
    # 3. Count different types of transactions
    if transaction_type == "Transfer":
        transfer_count += 1
    elif transaction_type == "Deposit":
        deposit_count += 1
    elif transaction_type == "Withdrawal":
        withdrawal_count += 1
    
    # 4. Check if the transaction is fraudulent
    if fraud_flag:
        total_fraud += 1
        fraud_transactions.append(transaction_id)
    
    # 5. Calculate category-wise spending
    if category in category_spend:
        category_spend[category] += amount
    else:
        category_spend[category] = amount
    
    # 6. Print detailed transaction summary
    print(f"Transaction ID: {transaction_id}")
    print(f"Type: {transaction_type}, Amount: {amount}")
    print(f"Category: {category}, Fraud: {fraud_flag}")
    
    # 7. Highlight suspicious transactions (large amounts)
    if amount > 5000:
        print(f"Warning: Large transaction detected! ID: {transaction_id}, Amount: {amount}")
    
    # 8. Calculate the average transaction amount for this loop
    avg_transaction = total_amount / (index + 1)
    print(f"Average Transaction Amount so far: {avg_transaction:.2f}")
    
    # 9. Display current category-wise breakdown
    print("Category Spending Breakdown:")
    for cat, spend in category_spend.items():
        print(f"{cat}: {spend}")
    
    # 10. Print a separator for readability
    print("="*40)


Transaction ID: 8878612
Type: Transfer, Amount: 3510.07
Category: Utilities, Fraud: False
Average Transaction Amount so far: 3510.07
Category Spending Breakdown:
Utilities: 3510.07
Transaction ID: 2733712
Type: Deposit, Amount: 2522.09
Category: Groceries, Fraud: False
Average Transaction Amount so far: 3016.08
Category Spending Breakdown:
Utilities: 3510.07
Groceries: 2522.09
Transaction ID: 5836221
Type: Transfer, Amount: 9443.94
Category: Health, Fraud: False
Average Transaction Amount so far: 5158.70
Category Spending Breakdown:
Utilities: 3510.07
Groceries: 2522.09
Health: 9443.94
Transaction ID: 9755716
Type: Deposit, Amount: 9390.66
Category: Travel, Fraud: False
Average Transaction Amount so far: 6216.69
Category Spending Breakdown:
Utilities: 3510.07
Groceries: 2522.09
Health: 9443.94
Travel: 9390.66
Transaction ID: 9204424
Type: Withdrawal, Amount: 1608.07
Category: Groceries, Fraud: True
Average Transaction Amount so far: 5294.97
Category Spending Breakdown:
Utilities: 3510.

In [13]:

# Final summary after processing all transactions
print(f"Total Amount Processed: {total_amount}")
print(f"Fraudulent Transactions Count: {total_fraud}")
print(f"Fraudulent Transactions IDs: {fraud_transactions}")
print(f"Transaction Type Breakdown: Transfers: {transfer_count}, Deposits: {deposit_count}, Withdrawals: {withdrawal_count}")


Total Amount Processed: 5101042.790000008
Fraudulent Transactions Count: 489
Fraudulent Transactions IDs: [9204424, 8522573, 7209487, 8504784, 8312246, 6768908, 1417867, 5617284, 9948924, 6305458, 7339396, 3905378, 5814334, 2408156, 1277558, 3892058, 6047111, 5232115, 6504630, 7238541, 8456681, 2696336, 3683865, 1403686, 9384970, 1283856, 7802383, 5923005, 4714326, 4680094, 4633342, 4170810, 9713485, 9882556, 7247643, 8578604, 7031251, 2064028, 6615953, 6393228, 1756459, 9034266, 1292818, 6357843, 8962960, 9627136, 2225751, 6853543, 4854627, 8281295, 3723646, 7140943, 8405713, 9032139, 7855932, 9792401, 4649469, 7714670, 9504849, 6695426, 8457549, 5310188, 2884104, 3756591, 2948708, 7434371, 1769978, 3908665, 6220835, 1186190, 4772177, 1274200, 3303006, 5634357, 3986914, 4784649, 5808293, 7968034, 2752147, 4588202, 9386408, 2911841, 2693381, 6497551, 5847408, 8230073, 5179309, 9452157, 4849199, 5276515, 1177705, 5082963, 7570969, 4016869, 7647413, 9871067, 1192677, 5313019, 1737096, 55

#Comparision of for loop and pd.apply()

In [15]:
# Using a for loop to calculate Amount_Percentage
for index, row in df.iterrows():
    df.loc[index, 'Amount_Percentage'] = (row['Amount'] / row['Account_Balance']) * 100

# using pd.apply()

In [16]:
# Using apply() to calculate Amount_Percentage
df['Amount_Percentage'] = df.apply(lambda row: (row['Amount'] / row['Account_Balance']) * 100, axis=1)
