In [None]:
# Import python packages
import pandas as pd
import numpy as np
import streamlit as st
import altair as alt
from snowflake.cortex import Summarize
from snowflake.snowpark.functions import col


In [None]:
# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
-- Welcome to Snowflake Notebooks!
-- Try out a SQL cell to generate some data.
SELECT * FROM CORTEX_POC.DISPUTE.CHARGEBACKS;

# Exploratory Data Analysis (EDA)
Machine learning models thrive on clean and well-organized data. To ensure our models perform at their best, we'll investigate our dataset to address any missing values and visualize the distributions of each column.

In [None]:
# Then, we can use the python name to turn cell2 into a Pandas dataframe
cgbk_df = cell2.to_pandas()

# Chart the data
st.subheader("Amount charged on each CHARGEBACK date 🥇")
st.bar_chart(cgbk_df, x='CHARGEBACK_DATE', y='AMOUNT')


# Basic Summary Statistics 

In [None]:
cgbk_df.describe()

# Checking nulls with Pandas

In [None]:
cgbk_df.isnull().sum()

Let's aggregate chargebacks by week or month to look for trends

In [None]:
CREATE OR REPLACE VIEW chargeback_trends AS
SELECT
    TO_CHAR(CHARGEBACK_DATE, 'YYYY-MM') AS month_year,
    Chargeback_Reason,
    COUNT(*) AS chargeback_count
FROM CHARGEBACKS
GROUP BY 1, 2
ORDER BY 1;

In [None]:
SELECT * FROM CHARGEBACK_TRENDS;

In [None]:
weekly_trends_df = cell11.to_pandas()

# Chart the data
st.subheader("Monthly Trends :calendar:")
st.bar_chart(weekly_trends_df, x='MONTH_YEAR', y='CHARGEBACK_COUNT', color='CHARGEBACK_REASON')

# Feature Engineering
Preprocess the data to include useful features and label encoding for the Outcome. You need to ensure that all necessary attributes for prediction are available:

In [None]:
CREATE OR REPLACE VIEW chargeback_processed AS
SELECT
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason,
    -- Add any additional relevant features
    resolution_time,
    CASE 
        WHEN Outcome = 'resolved in favor of member' THEN 'customer'
        WHEN Outcome = 'resolved in favor of merchant' THEN 'merchant'
        ELSE NULL
    END AS outcome_label
FROM chargebacks;  -- Exclude rows without an outcome

In [None]:
SELECT * FROM CHARGEBACK_PROCESSED;

Now that we have our view prepared, we can create our binary classification model.

In [None]:
CREATE OR REPLACE SNOWFLAKE.ML.CLASSIFICATION dispute_binary(
    INPUT_DATA => SYSTEM$REFERENCE('view', 'CHARGEBACK_PROCESSED'),
    TARGET_COLNAME => 'outcome_label'
);

In [None]:
create or replace view chargeback_predicted as 
SELECT
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason,
    -- Add any additional relevant features
    resolution_time
FROM chargeback_prediction;

In [None]:
select * from chargeback_predicted;

In [None]:
SELECT dispute_binary!PREDICT(
    INPUT_DATA => {*})
    as predictions from chargeback_predicted;

In [None]:
create or replace table chargeback_predicted_data as SELECT *, dispute_binary!PREDICT(INPUT_DATA => {*}) as predictions from chargeback_predicted;

In [None]:
SELECT * FROM CHARGEBACK_PREDICTED_DATA;

In [None]:
CREATE OR REPLACE VIEW CHARGEBACK_PREDICTED_OUTCOME AS
SELECT 
MERCHANT_CATEGORY,
MEMBER_DOCUMENTATION,
CHARGEBACK_REASON,
RESOLUTION_TIME,
    predictions:class AS LIABLE_FOR_DISPUTE,
    ROUND(predictions['probability']['customer'],4) AS customer_probability,
    ROUND(predictions['probability']['merchant'],4) AS merchant_probability
FROM CHARGEBACK_PREDICTED_DATA

In [None]:
SELECT * FROM CHARGEBACK_PREDICTED_OUTCOME LIMIT 10;

In [None]:

df = cell26.to_pandas()
# Streamlit application
st.title("Chargeback Predictive Analysis Visualizations")
# Use Case 1: Distribution of Dispute Liability
st.header("1: Distribution of Dispute Liability")
liability_counts = df['LIABLE_FOR_DISPUTE'].value_counts().reset_index()
liability_counts.columns = ['Liable For Dispute', 'Count']
# Create a vega-lite chart for the pie chart
liability_pie_chart = {
    "data": {
        "values": liability_counts.to_dict(orient='records')
    },
    "mark": "arc",
    "encoding": {
        "theta": {"field": "Count", "type": "quantitative"},
        "color": {"field": "Liable For Dispute", "type": "nominal", "scale": {"scheme": "category10"}}
    }
}
st.vega_lite_chart(liability_pie_chart, use_container_width=True, theme="streamlit")

st.header("2: Resolution Time by Merchant Category")
# Create a box plot with Vega-Lite
box_plot = {
    "data": {
        "values": df
    },
    "mark": "boxplot",
    "encoding": {
        "x": {"field": "MERCHANT_CATEGORY", "type": "nominal"},
        "y": {"field": "RESOLUTION_TIME", "type": "quantitative"},
        "color": {"field": "MERCHANT_CATEGORY", "type": "nominal"}
    },
    "title": "Box Plot of Resolution Time by Merchant Category"
}
# Display the box plot
st.vega_lite_chart(box_plot,use_container_width=True)


st.header("3: Outcomes by Chargeback Reason over Resolution Time")
# Create a grouped bar chart with Vega-Lite
grouped_bar_chart = {
    "data": {
        "values": df
    },
    "mark": "bar",
    "encoding": {
        "x": {"field": "CHARGEBACK_REASON", "type": "nominal", "axis": {"labelAngle": -45}},
        "y": {"field": "RESOLUTION_TIME", "type": "quantitative", "axis": {"title": "RESOLUTION TIME"}},
        "color": {"field": "LIABLE_FOR_DISPUTE", "type": "nominal","title":"Liable for Dispute"},
        "opacity": {"condition": {"test": "datum.Outcome == 'CUSTOMER_PROBABILITY'", "value": 1}, "value": 0.7}
    }
}
# Display the grouped bar chart
st.vega_lite_chart(grouped_bar_chart, use_container_width=True)

df = pd.DataFrame(df)
# Aggregate the data to count outcomes by MEMBER_DOCUMENTATION
outcomes_count = df.groupby(['MEMBER_DOCUMENTATION', 'LIABLE_FOR_DISPUTE']).size().reset_index(name='Count')
# Create a stacked bar chart with Vega-Lite
stacked_bar_chart = {
    "data": {
        "values": outcomes_count.to_dict(orient='records')
    },
    "mark": "bar",
    "encoding": {
        "x": {"field": "MEMBER_DOCUMENTATION", "type": "nominal", "axis": {"labelAngle": -45}},
        "y": {"field": "Count", "type": "quantitative"},
        "color": {"field": "LIABLE_FOR_DISPUTE", "type": "nominal", "scale": {"scheme": "category10"}}
    },
    "title": "Proportion of Outcomes Based on Member Documentation"
}
# Streamlit application
st.title("Documentation Impact on Chargeback Outcomes")
# Display the stacked bar chart
st.vega_lite_chart(stacked_bar_chart,use_container_width=True)

In [None]:
CALL dispute_binary!SHOW_EVALUATION_METRICS();
CALL dispute_binary!SHOW_GLOBAL_EVALUATION_METRICS();
CALL dispute_binary!SHOW_CONFUSION_MATRIX();
CALL dispute_binary!SHOW_FEATURE_IMPORTANCE();

In [None]:
CREATE OR REPLACE VIEW chargeback_processed2 AS
SELECT
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason,
    CASE 
        WHEN Outcome = 'resolved in favor of member' THEN 'customer'
        WHEN Outcome = 'resolved in favor of merchant' THEN 'merchant'
        ELSE NULL
    END AS outcome_label
FROM chargebacks;

In [None]:
CREATE OR REPLACE SNOWFLAKE.ML.CLASSIFICATION dispute_binary2(
    INPUT_DATA => SYSTEM$REFERENCE('view', 'CHARGEBACK_PROCESSED2'),
    TARGET_COLNAME => 'outcome_label'
);

In [None]:
create or replace view chargeback_predicted2 as 
SELECT
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason
FROM chargeback_prediction;

In [None]:
SELECT dispute_binary2!PREDICT(
    INPUT_DATA => {*})
    as predictions from chargeback_predicted2;

In [None]:
create or replace table chargeback_predicted_data2 as SELECT *, dispute_binary2!PREDICT(INPUT_DATA => {*}) as predictions from chargeback_predicted2;

In [None]:
CREATE OR REPLACE VIEW CHARGEBACK_PREDICTED_OUTCOME2 AS
SELECT 
MERCHANT_CATEGORY,
MEMBER_DOCUMENTATION,
CHARGEBACK_REASON,
    predictions:class AS LIABLE_FOR_DISPUTE,
    ROUND(predictions['probability']['customer'],4) AS customer_probability,
    ROUND(predictions['probability']['merchant'],4) AS merchant_probability
FROM CHARGEBACK_PREDICTED_DATA2

In [None]:
SELECT * FROM CHARGEBACK_PREDICTED_OUTCOME2;

In [None]:

df = cell36.to_pandas()
# Streamlit application
st.title("Chargeback Predictive Analysis Visualizations")
# Use Case 1: Distribution of Dispute Liability
st.header("1: Distribution of Dispute Liability")
liability_counts = df['LIABLE_FOR_DISPUTE'].value_counts().reset_index()
liability_counts.columns = ['Liable For Dispute', 'Count']
# Create a vega-lite chart for the pie chart
liability_pie_chart = {
    "data": {
        "values": liability_counts.to_dict(orient='records')
    },
    "mark": "arc",
    "encoding": {
        "theta": {"field": "Count", "type": "quantitative"},
        "color": {"field": "Liable For Dispute", "type": "nominal", "scale": {"scheme": "category10"}}
    }
}
st.vega_lite_chart(liability_pie_chart, use_container_width=True, theme="streamlit")


df = pd.DataFrame(df)
# Aggregate the data to count outcomes by MEMBER_DOCUMENTATION
outcomes_count = df.groupby(['MEMBER_DOCUMENTATION', 'LIABLE_FOR_DISPUTE']).size().reset_index(name='Count')
# Create a stacked bar chart with Vega-Lite
stacked_bar_chart = {
    "data": {
        "values": outcomes_count.to_dict(orient='records')
    },
    "mark": "bar",
    "encoding": {
        "x": {"field": "MEMBER_DOCUMENTATION", "type": "nominal", "axis": {"labelAngle": -45}},
        "y": {"field": "Count", "type": "quantitative"},
        "color": {"field": "LIABLE_FOR_DISPUTE", "type": "nominal", "scale": {"scheme": "category10"}}
    },
    "title": "Proportion of Outcomes Based on Member Documentation"
}
# Streamlit application
st.title("2: Documentation Impact on Chargeback Outcomes")
# Display the stacked bar chart
st.vega_lite_chart(stacked_bar_chart,use_container_width=True, theme="streamlit")

In [None]:
CALL dispute_binary2!SHOW_EVALUATION_METRICS();
CALL dispute_binary2!SHOW_GLOBAL_EVALUATION_METRICS();
CALL dispute_binary2!SHOW_CONFUSION_MATRIX();
CALL dispute_binary2!SHOW_FEATURE_IMPORTANCE();

In [None]:
CREATE OR REPLACE VIEW chargeback_processed3 AS
SELECT
    CHARGEBACK_DATE,
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason,
    CASE 
        WHEN Outcome = 'resolved in favor of member' THEN 'customer'
        WHEN Outcome = 'resolved in favor of merchant' THEN 'merchant'
        ELSE NULL
    END AS outcome_label
FROM chargebacks;

In [None]:
CREATE OR REPLACE SNOWFLAKE.ML.CLASSIFICATION dispute_binary3(
    INPUT_DATA => SYSTEM$REFERENCE('view', 'CHARGEBACK_PROCESSED3'),
    TARGET_COLNAME => 'outcome_label'
);

In [None]:
create or replace view chargeback_predicted3 as 
SELECT
    CHARGEBACK_DATE,
    Merchant_Category,
    MEMBER_DOCUMENTATION,
    Chargeback_Reason
FROM chargeback_prediction;

In [None]:
create or replace table chargeback_predicted_data3 as SELECT *, dispute_binary2!PREDICT(INPUT_DATA => {*}) as predictions from chargeback_predicted3;

In [None]:
CREATE OR REPLACE VIEW CHARGEBACK_PREDICTED_OUTCOME3 AS
SELECT 
CHARGEBACK_DATE,
MERCHANT_CATEGORY,
MEMBER_DOCUMENTATION,
CHARGEBACK_REASON,
    predictions:class AS LIABLE_FOR_DISPUTE,
    ROUND(predictions['probability']['customer'],4) AS customer_probability,
    ROUND(predictions['probability']['merchant'],4) AS merchant_probability
FROM CHARGEBACK_PREDICTED_DATA3

In [None]:
SELECT * FROM CHARGEBACK_PREDICTED_OUTCOME3;

In [None]:
CALL dispute_binary3!SHOW_FEATURE_IMPORTANCE();

In [None]:
CREATE OR REPLACE VIEW DISPUTE_PROCESSED AS
SELECT
    DISPUTE_ID,
    DISPUTE_TYPE,
    CUSTOMER_DOCUMENTATION, 
    CASE 
        WHEN Outcome = 'resolved in favor of customer' THEN 'merchant'
        WHEN Outcome = 'resolved in favor of merchant' THEN 'customer'
        ELSE NULL
    END AS liable_for_dispute
FROM DISPUTE_DATA;

In [None]:
SELECT * FROM DISPUTE_PROCESSED;

In [None]:
CREATE OR REPLACE SNOWFLAKE.ML.CLASSIFICATION dispute_binary4(
    INPUT_DATA => SYSTEM$REFERENCE('view', 'DISPUTE_PROCESSED'),
    TARGET_COLNAME => 'liable_for_dispute'
);

In [None]:
CALL dispute_binary4!SHOW_FEATURE_IMPORTANCE();

In [None]:
use role sysadmin;
create or replace view DISPUTE_TO_PREDICT as 
SELECT
    DISPUTE_ID,
    DISPUTE_TYPE,
    CUSTOMER_DOCUMENTATION
FROM DISPUTE_PREDICTION;

In [None]:
SELECT * FROM DISPUTE_TO_PREDICT;

In [None]:
create or replace table DISPUTE_PREDICTED_DATA as SELECT *, dispute_binary2!PREDICT(INPUT_DATA => {*}) as predictions from DISPUTE_TO_PREDICT;

In [None]:
CREATE OR REPLACE VIEW DISPUTE_OUTCOME AS
SELECT 
    D1.DISPUTE_ID,
    D2.CHARGEBACK_DATE,
    D1.DISPUTE_TYPE,
    D2.PRODUCT_TYPE,
    D1.CUSTOMER_DOCUMENTATION,
    D2.CUSTOMER_STATEMENT,
    predictions:class AS LIABLE_FOR_DISPUTE,
    ROUND(predictions['probability']['customer'],4) AS customer_probability,
    ROUND(predictions['probability']['merchant'],4) AS merchant_probability,
    D2.STATUS
FROM DISPUTE_PREDICTED_DATA D1
JOIN DISPUTE_PREDICTION D2
ON D1.DISPUTE_ID=D2.DISPUTE_ID;

In [None]:
SELECT * FROM DISPUTE_OUTCOME

In [None]:
my_df = cell49.to_pandas()

# Chart the data
st.subheader("STATUS OF DISPUTE FOR EACH PRODUCT_TYPE WITH AMOUNT ❄️")
st.bar_chart(my_df, x='PRODUCT_TYPE', y='CHARGEBACK_DATE', color='DISPUTE_TYPE')

In [None]:
CREATE OR REPLACE VIEW DISPUTE_SENTIMENT AS
SELECT
    *,
    SNOWFLAKE.CORTEX.SENTIMENT(CUSTOMER_STATEMENT) AS SENTIMENT_RESULT
FROM
    DISPUTE_OUTCOME where STATUS='Rejected';

In [None]:
select * from dispute_sentiment ;

In [None]:
df= cell52.to_pandas()
results_df=pd.DataFrame(df)
st.header("Need Summary?")
selected_indices = st.selectbox("What summary do you want?", results_df.index)

selected_ted= results_df.loc[selected_indices]
st.text("Selected Customer Review:")
st.dataframe(selected_ted)

talk_content= session.table("dispute_sentiment").select('CUSTOMER_STATEMENT').filter(col('DISPUTE_ID')==selected_ted.DISPUTE_ID).collect()
text_to_summarize=talk_content[0].CUSTOMER_STATEMENT
text_summarise= Summarize(text_to_summarize)

st.title("Summary")
st.markdown(text_summarise)