In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [2]:
data = {
    'Name': ['Spider-Man', 'Iron Man', 'Thanos', 'Loki', 'Black Widow', 'Venom'],
    'Alignment': ['Good', 'Good', 'Bad', 'Neutral', 'Good', 'Neutral'],
    'Power_Source': ['Radiation', 'Technology', 'Eternal', 'Magic', 'Training', 'Symbiote']
}

df = pd.DataFrame(data)
print("Original Datensatz:")
print(df)

Original Datensatz:
          Name Alignment Power_Source
0   Spider-Man      Good    Radiation
1     Iron Man      Good   Technology
2       Thanos       Bad      Eternal
3         Loki   Neutral        Magic
4  Black Widow      Good     Training
5        Venom   Neutral     Symbiote


In [3]:
# Initialisierung des Encoders
# handle_unknown='ignore' ist wichtig, falls später ein neuer Charakter (z.B. 'Mutant') auftaucht
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

In [4]:
# Wir wählen die Spalten aus
features_to_encode = ['Alignment', 'Power_Source']

In [5]:
# Das Fitting und Transformieren
encoded_data = encoder.fit_transform(df[features_to_encode])
encoded_data

array([[0., 1., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 1., 0., 0.]])

In [6]:
# Wir holen uns die neuen Spaltennamen automatisch vom Encoder
encoded_columns = encoder.get_feature_names_out(features_to_encode)
encoded_columns

array(['Alignment_Bad', 'Alignment_Good', 'Alignment_Neutral',
       'Power_Source_Eternal', 'Power_Source_Magic',
       'Power_Source_Radiation', 'Power_Source_Symbiote',
       'Power_Source_Technology', 'Power_Source_Training'], dtype=object)

In [7]:
# Erstellen des neuen DataFrames
df_encoded = pd.DataFrame(encoded_data, columns=encoded_columns)
df_encoded

Unnamed: 0,Alignment_Bad,Alignment_Good,Alignment_Neutral,Power_Source_Eternal,Power_Source_Magic,Power_Source_Radiation,Power_Source_Symbiote,Power_Source_Technology,Power_Source_Training
0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [8]:
# Wir fügen den Namen wieder hinzu, damit wir wissen, wer wer ist
df_final = pd.concat([df['Name'], df_encoded], axis=1)
df_final

Unnamed: 0,Name,Alignment_Bad,Alignment_Good,Alignment_Neutral,Power_Source_Eternal,Power_Source_Magic,Power_Source_Radiation,Power_Source_Symbiote,Power_Source_Technology,Power_Source_Training
0,Spider-Man,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,Iron Man,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,Thanos,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,Loki,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
4,Black Widow,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,Venom,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
