In [None]:
import pandas as pd
import numpy as np
from sklearn import metrics
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv('modellMitScore.csv')
data.head(200)

Unnamed: 0,barvermoegen,aktienvermoegen,saeule3a,lebensversicherung,pensionskasse,totalVermoegen,lohn,lohnJahr,bonus,zulagen,totalLohn,schaetzungObjekt,anzahlung,tragbarkeit,score
0,154557.91760,28291.630990,80000,0,109368.55690,372218.10550,5600,67200,0.000000,840,68040.0000,5.079846e+05,100,50,75
1,338303.87560,7606.009156,40000,45000,207181.98360,638091.86830,9100,109200,0.000000,0,109200.0000,7.126644e+05,100,70,85
2,395285.75170,910.833454,40000,0,80878.81724,517075.40240,5600,67200,0.000000,840,68040.0000,1.593039e+06,90,10,50
3,308723.73680,9147.939781,40000,0,200101.56730,557973.24390,13000,156000,15759.389290,0,171759.3893,1.088717e+06,100,50,75
4,278315.53960,10403.239510,130000,0,0.00000,418718.77910,9100,109200,0.000000,0,109200.0000,5.413073e+05,100,80,90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,38914.76640,9517.760814,20000,0,16761.54682,85194.07403,5600,67200,0.000000,0,67200.0000,3.886354e+04,100,100,100
196,83161.05179,4941.530877,40000,0,66288.18028,194390.76290,13000,156000,11398.495100,0,167398.4951,1.974631e+06,60,20,40
197,94860.43043,3700.516002,130000,0,0.00000,228560.94640,9100,109200,0.000000,0,109200.0000,1.828932e+06,0,10,5
198,55936.83198,7575.993352,130000,0,0.00000,193512.82530,9100,109200,0.000000,0,109200.0000,1.022611e+06,0,40,20


In [None]:
np.random.seed(42)
data['age'] = np.random.beta(2, 4, data.shape[0]) * (90-25) + 25
data['usCitizen'] = np.random.rand(data.shape[0]) > 0.9
data['propertyType'] = np.random.choice(["investment", "owner-occupied"], data.shape[0])

### Beschreibung der Tabelle
| Spalte | Beschreibung |
| :------ | ------: |
| barvermoegen | Vermögen in Bar in CHF |
| aktienvermoegen | Vermögen in Aktien in CHF |
| saeule3a | Kontostand 3a Säule in CHF |
| lebensversicherung | Kontostand Lebensversicherung in CHF |
| pensionskasse | Kontostand Pensionskasse in CHF |
| totalVermoegen | Summierung des Guthabens |
| lohn | Monatslohn in CHF |
| lohnJahr | Jahreslohn in CHF |
| bonus | Jahresbonus in CHF |
| zulagen | Monatszulagen in CHF |
| totalLohn | Totaler Jahreslohn in CHF |
| schaetzungObjekt | Objektkosten in CHF |
| anzahlung | Score der Anzahlung in % |
| tragbarkeit | Score der Tragbarkeit in % |
| score | Totaler Score in % |
| age | Alter der Person in Jahren |
| usCitizen | Bedingung: US Bürger |
| propertyType | Objekttyp |

In [None]:
col = ["totalVermoegen", "totalLohn", "schaetzungObjekt", "usCitizen", "age"]
X = data[col]

y = np.round(data["score"] / 100)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
model = lm.LogisticRegression().fit(X_train, y_train)

In [None]:
y_predict_score = model.predict(X_test)

confm = metrics.confusion_matrix(y_test, y_predict_score)
print("confusion matrix:", "\n", confm, "\n")

score = (confm[0][0] + confm[1][1]) / np.sum(confm)
print("score:", score)

confusion matrix: 
 [[159  11]
 [  4 126]] 

score: 0.95


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=03c1f381-4bef-44c8-9167-00386ebceba9' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>