In [16]:
import pandas as pd
import plotly.express as px
from datetime import timedelta
import plotly.graph_objects as go
from pyod.models.knn import KNN
from pyod.models.ecod import ECOD
from scipy.stats import chi2_contingency


#### Importieren der Datei

In [3]:
# Laden der Excel-Datei in ein Pandas Dataframe
df_psp = pd.read_excel('C:/Users/Ibrom/Studium/DLMDWME01/PSP_Jan_Feb_2019.xlsx', sheet_name='Sheet1')

# Umbenennen der ersten Spalte in 'Id'
df_psp = df_psp.rename(columns={df_psp.columns[0]: 'Id'})

df_psp.head()

Unnamed: 0,Id,tmsp,country,amount,success,PSP,3D_secured,card
0,0,2019-01-01 00:01:11,Germany,89,0,UK_Card,0,Visa
1,1,2019-01-01 00:01:17,Germany,89,1,UK_Card,0,Visa
2,2,2019-01-01 00:02:49,Germany,238,0,UK_Card,1,Diners
3,3,2019-01-01 00:03:13,Germany,238,1,UK_Card,1,Diners
4,4,2019-01-01 00:04:33,Austria,124,0,Simplecard,0,Diners


In [None]:
# Add 57 month to transfer date from Jan-Feb 2019 to Okt-Nov 2023
df_psp.tmsp = df_psp.tmsp + pd.DateOffset(months=57)

### Korrelationsanalyse

In [21]:

# Erstellen Sie eine Kreuztabelle der Variablen 'card' und 'PSP'
cross_tab = pd.crosstab(df_psp['card'], df_psp['PSP'])

print(cross_tab)

# Führen Sie den Chi-Quadrat-Test durch
chi2, p, dof, expected = chi2_contingency(cross_tab)

print(f"\nChi-Quadrat Statistik: {chi2}")
print(f"p-Wert: {p}")

PSP     Goldcard  Moneycard  Simplecard  UK_Card
card                                            
Diners       670       1667        2356     5075
Master      1763       4765        7245    15229
Visa         775       1865        2845     6155

Chi-Quadrat Statistik: 16.04005658272315
p-Wert: 0.013540575127843202


In [20]:

# Erstellen Sie eine Kreuztabelle der Variablen 'Success' und 'PSP'
cross_tab = pd.crosstab(df_psp['success'], df_psp['PSP'])

print(cross_tab)

# Führen Sie den Chi-Quadrat-Test durch
chi2, p, dof, expected = chi2_contingency(cross_tab)

print(f"\nChi-Quadrat Statistik: {chi2}")
print(f"p-Wert: {p}")

PSP      Goldcard  Moneycard  Simplecard  UK_Card
success                                          
0            1905       6482       10478    21317
1            1303       1815        1968     5142

Chi-Quadrat Statistik: 998.7783848081615
p-Wert: 3.312365253382584e-216


In [29]:
df_encoded_psp = pd.get_dummies(df_psp, columns=['PSP'])
df_encoded_psp.head()
correlation_matrix_psp = df_encoded_psp[['amount','PSP_Goldcard', 'PSP_Moneycard', 'PSP_Simplecard', 'PSP_UK_Card']].corr()
print(correlation_matrix_psp)

                  amount  PSP_Goldcard  PSP_Moneycard  PSP_Simplecard  \
amount          1.000000      0.002707      -0.002318       -0.006180   
PSP_Goldcard    0.002707      1.000000      -0.115715       -0.149268   
PSP_Moneycard  -0.002318     -0.115715       1.000000       -0.254145   
PSP_Simplecard -0.006180     -0.149268      -0.254145        1.000000   
PSP_UK_Card     0.005734     -0.274007      -0.466528       -0.601802   

                PSP_UK_Card  
amount             0.005734  
PSP_Goldcard      -0.274007  
PSP_Moneycard     -0.466528  
PSP_Simplecard    -0.601802  
PSP_UK_Card        1.000000  


In [42]:
df_encoded_card = pd.get_dummies(df_psp, columns=['card'])
#df_encoded_card.head()
correlation_matrix_card = df_encoded_card[['amount','card_Diners', 'card_Master', 'card_Visa']].corr()
print(correlation_matrix_card)

               amount  card_Diners  card_Master  card_Visa
amount       1.000000     0.000303    -0.002547   0.002703
card_Diners  0.000303     1.000000    -0.570612  -0.268623
card_Master -0.002547    -0.570612     1.000000  -0.637756
card_Visa    0.002703    -0.268623    -0.637756   1.000000


In [41]:
df_encoded_card_success = pd.get_dummies(df_psp, columns=['card'])
df_encoded_card_success = pd.get_dummies(df_encoded_card_success, columns=['success'])
df_encoded_card_success.head()
correlation_matrix_card_success = df_encoded_card_success[['success_0','success_1', 'card_Diners', 'card_Master', 'card_Visa']].corr()
print(correlation_matrix_card_success)

             success_0  success_1  card_Diners  card_Master  card_Visa
success_0     1.000000  -1.000000    -0.024474     0.026086  -0.007642
success_1    -1.000000   1.000000     0.024474    -0.026086   0.007642
card_Diners  -0.024474   0.024474     1.000000    -0.570612  -0.268623
card_Master   0.026086  -0.026086    -0.570612     1.000000  -0.637756
card_Visa    -0.007642   0.007642    -0.268623    -0.637756   1.000000


In [46]:
df_encoded_psp_success = pd.get_dummies(df_psp, columns=['PSP'])
df_encoded_psp_success = pd.get_dummies(df_encoded_psp_success, columns=['success'])
df_encoded_psp_success.head()
correlation_matrix_psp_success = df_encoded_psp_success[['success_0','success_1', 'PSP_Goldcard', 'PSP_Moneycard', 'PSP_Simplecard', 'PSP_UK_Card']].corr()
print(correlation_matrix_psp_success)

                success_0  success_1  PSP_Goldcard  PSP_Moneycard  \
success_0        1.000000  -1.000000     -0.131774      -0.017502   
success_1       -1.000000   1.000000      0.131774       0.017502   
PSP_Goldcard    -0.131774   0.131774      1.000000      -0.115715   
PSP_Moneycard   -0.017502   0.017502     -0.115715       1.000000   
PSP_Simplecard   0.063746  -0.063746     -0.149268      -0.254145   
PSP_UK_Card      0.022366  -0.022366     -0.274007      -0.466528   

                PSP_Simplecard  PSP_UK_Card  
success_0             0.063746     0.022366  
success_1            -0.063746    -0.022366  
PSP_Goldcard         -0.149268    -0.274007  
PSP_Moneycard        -0.254145    -0.466528  
PSP_Simplecard        1.000000    -0.601802  
PSP_UK_Card          -0.601802     1.000000  


In [None]:
#tmsp - country
#tmsp - amount
#tmsp - success
#tmsp - PSP
#tmsp - 3D_secured
#tmsp - card

In [None]:
#country - amount
#country - success
#country - PSP
#country - 3D_secured
#country - card

In [None]:
#amount - success
#amount - PSP
#amount - 3D_secured
#amount - card

In [None]:
#success - PSP
#success - 3D_secured
#success - card

In [None]:
#PSP - 3D_secured
#PSP - card

In [None]:
#3D_secured - card