**Libraries/Imports**

In [68]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report


**Data Preprocessing**

In [64]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Reading in raw Pokemon Database.csv
raw = pd.read_csv('Pokemon Database.csv')

# Cleaning string values
for index, pokemon in raw.iterrows():
    for column in raw.columns:
        if isinstance(pokemon[column], str):
            raw.at[index, column] = pokemon[column][1:-1]

# Converting Alternate Form Name to Correct Names
raw["Alternate Form Name"] = raw["Alternate Form Name"].replace({
    "Hisui": "Hisuian",
    "Alola": "Alolan",
    "Galar": "Galarian"
})    

# Updating Pokemon Names and Handling Missing Values
for index, pokemon in raw.iterrows():
    if pd.isna(pokemon['Legendary Type']):
        raw.at[index, "Legendary Type"] = "Regular"
    if pd.isna(pokemon["Secondary Type"]):
        raw.at[index, "Secondary Type"] = pokemon["Primary Type"]     
    alternate_form = pokemon['Alternate Form Name']
    if not pd.isna(alternate_form) and isinstance(alternate_form, str):
        if alternate_form in ["Mega X", "Mega Y"]:
            raw.at[index, "Pokemon Name"] = f"Mega {raw.at[index, 'Pokemon Name']} {alternate_form[-1]}"
        elif pokemon["Pokemon Name"] in ["Unown", "Hoopa"]:
            raw.at[index, "Pokemon Name"] = f"{raw.at[index, 'Pokemon Name']} {alternate_form}"
        else:
            raw.at[index, "Pokemon Name"] = f"{alternate_form} {raw.at[index, 'Pokemon Name']}"

# Selecting Relevant Columns
relevant = raw[['Pokemon Id', 'Pokedex Number', 'Pokemon Name',
       'Alternate Form Name', 'Original Pokemon ID', 'Legendary Type',
       'Pokemon Height', 'Pokemon Weight', 'Primary Type', 'Secondary Type', 
       'Male Ratio', 'Female Ratio', 'Base Happiness', 'Health Stat', 'Attack Stat',
       'Defense Stat', 'Special Attack Stat', 'Special Defense Stat',
       'Speed Stat', 'Base Stat Total', 'Health EV', 'Attack EV', 'Defense EV',
       'Special Attack EV', 'Special Defense EV', 'Speed EV', 'EV Yield Total',
       'Catch Rate', 'Experience Growth', 'Experience Growth Total', 'Egg Cycle Count']]

# Remove Gigantamax Forms
relevant = relevant.loc[relevant['Alternate Form Name'] != 'Gigantamax']
relevant = relevant.reset_index()

# Define Features for Transformation
features = ['Legendary Type', 'Pokemon Height', 'Pokemon Weight', 'Primary Type', 'Secondary Type',
            'Male Ratio', 'Female Ratio', 'Base Happiness', 'Health Stat', 'Attack Stat', 'Defense Stat', 
            'Special Attack Stat', 'Special Defense Stat', 'Speed Stat', 'Base Stat Total', 'Health EV', 
            'Attack EV', 'Defense EV', 'Special Attack EV', 'Special Defense EV', 'Speed EV', 
            'EV Yield Total', 'Catch Rate', 'Experience Growth', 'Experience Growth Total', 'Egg Cycle Count'] 

# Define Numerical Features
numerical_features = [col for col in features if col not in ['Legendary Type', 'Experience Growth', 'Primary Type', 'Secondary Type']]

# Apply Label Encoding to Typings (For Classification)
label_encoder_primary = LabelEncoder()
label_encoder_secondary = LabelEncoder()

relevant['Primary Typing Label'] = label_encoder_primary.fit_transform(relevant['Primary Type'])
relevant['Secondary Typing Label'] = label_encoder_secondary.fit_transform(relevant['Secondary Type'])

# Define One-Hot Encoding for Categorical Features, Scaling Values
categorical_features = ['Legendary Type', 'Experience Growth']
transformer = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),  
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)  
])

# Apply Transformations
transformed = transformer.fit_transform(relevant[features])
encoded_feature_names = transformer.get_feature_names_out()

# Convert to DataFrame
processed = pd.DataFrame(transformed, columns=encoded_feature_names)

# Add Primary & Secondary Typing Labels
processed['Primary Typing Label'] = relevant['Primary Typing Label']
processed['Secondary Typing Label'] = relevant['Secondary Typing Label']

# Save Processed Data
print(processed.shape)
processed.to_csv('processed_data.csv', index=False)

(1350, 34)


**SVM Model**

In [69]:
# Defining Features and Labels Matricies
X = processed.drop(columns=['Primary Typing Label', 'Secondary Typing Label'])
y = processed[['Primary Typing Label']]

# Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Declare Classifier and Train
classifier = SVC(kernel='rbf', decision_function_shape='ovr')  # 'ovo' also works
classifier.fit(X_train, y_train)

# Predict
y_pred = classifier.predict(X_test)

# Classification Report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.50      0.38      0.43        26
           1       0.00      0.00      0.00        16
           2       0.36      0.56      0.43         9
           3       0.36      0.31      0.33        13
           4       0.89      0.67      0.76        12
           5       0.25      0.22      0.24         9
           6       0.43      0.23      0.30        13
           8       0.14      0.14      0.14         7
           9       0.21      0.35      0.27        17
          10       0.10      0.12      0.11         8
          11       0.00      0.00      0.00         9
          12       0.37      0.91      0.53        32
          13       0.00      0.00      0.00        11
          14       0.59      0.45      0.51        22
          15       0.88      0.29      0.44        24
          16       0.60      0.55      0.57        11
          17       0.23      0.32      0.27        31

    accuracy              

  y = column_or_1d(y, warn=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
