In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
df = pd.read_csv("transactions.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df_null= pd.DataFrame(df.isnull().sum())
df_null

In [None]:
df.duplicated().sum()
print("The Total duplicated values in this Dataset are :",df.duplicated().sum())

In [None]:
df.nunique()

In [None]:
fig = px.histogram(df, x="amount", nbins=50, color="is_fraud",
                   title="Distribution of amounts for fraud")
fig.show()

In [None]:
fig = px.scatter(df, x="account_age_days", y="amount",
                 color="is_fraud", opacity=0.5,
                 title="Amount vs. Account Age")
fig.show()


In [None]:
fig = px.histogram(df, x="merchant_category", color="is_fraud",
                   title="Fraud by trade category")
fig.update_xaxes(categoryorder="total descending")
fig.show()

In [None]:
df["transaction_time"] = pd.to_datetime(df["transaction_time"])

fig = px.line(df.groupby(df["transaction_time"].dt.date)["is_fraud"].mean().reset_index(),
              x="transaction_time", y="is_fraud",
              title="Fraud rate over time")
fig.show()

In [None]:
fraud_by_channel = df.groupby('channel')['is_fraud'].mean().sort_values(ascending=False).reset_index(name='fraud_rate')
fraud_by_category = df.groupby('merchant_category')['is_fraud'].mean().sort_values(ascending=False).reset_index(name='fraud_rate')
fig_channel = px.bar(
    fraud_by_channel,
    x='channel',
    y='fraud_rate',
    title='Fraud Rate by Channel',
    color='fraud_rate', # Optional: Color the bars according to the fraud rate
    color_continuous_scale=px.colors.sequential.Viridis # Optional: Change the color scheme
)
fig_channel.update_layout(xaxis_title="Channel", yaxis_title="Fraud Rate")
fig_channel.show()

fig_category = px.bar(
    fraud_by_category,
    x='merchant_category',
    y='fraud_rate',
    title='Fraud Rate by Merchant Category,
    color='fraud_rate',
    color_continuous_scale=px.colors.sequential.Plasma # Optional: Change the color scheme
)
fig_category.update_layout(xaxis_title="Merchant Category", yaxis_title="Fraud Rate")
fig_category.show()

In [None]:
plt.figure(figsize=(10,8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Heat map of correlations")
plt.show()