In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
herodata = pd.read_csv('../data/fix.csv')

In [3]:
herodata.shape

(128, 1)

In [None]:
herodata.info()

In [None]:
# split_columns = herodata['Hero Name;Win Rate (%);Popularity (%);Ban Rate (%);Hero ID;Roles;Specialties;Recommended Lane;Release year;Tier;Meta;Difficulty;Durability Rating;Ability Effects Rating;Offense Rating;Scaling Rating;Cooldown Rating;Item Dependency Rating;Mobility Rating;Crowd Control Rating;Base Stats Growth Rating;Ultimate Impact Rating;Column1;Gameplay;Sinergi'].str.split(';', expand=True)

In [None]:
# column_names = [
#     'Hero Name', 'Win Rate (%)', 'Popularity (%)', 'Ban Rate (%)', 'Hero ID', 'Roles', 
#     'Specialties', 'Recommended Lane', 'Release year', 'Tier', 'Meta', 'Difficulty', 
#     'Durability Rating', 'Ability Effects Rating', 'Offense Rating', 'Scaling Rating', 
#     'Cooldown Rating', 'Item Dependency Rating', 'Mobility Rating', 'Crowd Control Rating', 
#     'Base Stats Growth Rating', 'Ultimate Impact Rating', 'Column1', 'Gameplay', 'Sinergi'
# ]

In [None]:
# split_columns.columns = column_names

In [None]:
# herodata = pd.concat([herodata, split_columns], axis=1)

In [None]:
# herodata.drop(columns=['Hero Name;Win Rate (%);Popularity (%);Ban Rate (%);Hero ID;Roles;Specialties;Recommended Lane;Release year;Tier;Meta;Difficulty;Durability Rating;Ability Effects Rating;Offense Rating;Scaling Rating;Cooldown Rating;Item Dependency Rating;Mobility Rating;Crowd Control Rating;Base Stats Growth Rating;Ultimate Impact Rating;Column1;Gameplay;Sinergi'], inplace=True)

In [None]:
herodata.isnull().sum()

In [None]:
# replace missing values with 0
herodata['Second Lane'].fillna('Tidak Ada', inplace=True)

In [None]:
herodata[['Scaling Rating', 'Recommended Lane', 'Second Lane',
       'Cooldown Rating', 'Item Dependency Rating', 'Mobility Rating',
       'Crowd Control Rating', 'Base Stats Growth Rating']].head()

In [None]:
herodata[['Hero Name' ,'Ultimate Impact Rating']].head()

In [None]:
herodata['Recommended Lane'].unique()

In [None]:
herodata['Ultimate Impact Rating'].unique()

In [None]:
herodata.columns

In [None]:
herodataEncoded = herodata[['Scaling Rating', 'Recommended Lane', 'Second Lane',
       'Cooldown Rating', 'Item Dependency Rating', 'Mobility Rating',
       'Crowd Control Rating', 'Base Stats Growth Rating']]

In [None]:
labelEncoder = LabelEncoder()

label_mappings = {}

for col in herodataEncoded.columns:
    herodataEncoded[col] = labelEncoder.fit_transform(herodataEncoded[col])
    label_mappings[col] = dict(zip(labelEncoder.classes_, labelEncoder.transform(labelEncoder.classes_)))

In [None]:
encoder = OneHotEncoder(sparse_output=False)
oneCol = encoder.fit_transform(herodata[['Ultimate Impact Rating']])

In [None]:
encoded_columns = encoder.get_feature_names_out(['Ultimate Impact Rating'])

In [None]:
oneCol = pd.DataFrame(oneCol, columns=encoded_columns)

In [None]:
sortedCol =  herodata[['Hero Name', 'Win Rate (%)', 'Popularity (%)', 'Ban Rate (%)']]

In [None]:
fitur = pd.concat([sortedCol, herodataEncoded, oneCol], axis=1)

In [None]:
fitur.columns

In [None]:
fitur[['Scaling Rating', 'Recommended Lane', 'Second Lane',
       'Cooldown Rating', 'Item Dependency Rating', 'Mobility Rating',
       'Crowd Control Rating', 'Base Stats Growth Rating']].head()

In [None]:
oneCol.head()

In [None]:
fitur.head()

In [None]:
fitur['Recommended Lane'].unique()

In [None]:
fitur.info()

In [None]:
fitur[['Win Rate (%)', 'Popularity (%)', 'Ban Rate (%)']] = fitur[['Win Rate (%)', 'Popularity (%)', 'Ban Rate (%)']].astype(float) / 100

In [None]:
fitur.head(10)

In [None]:
fitur.describe()

In [None]:
fitur.columns

In [None]:
def calculateStrength(row):
    
    # Bobot untuk setiap kategori
    weights = {
        'Win Rate': 0.3,
        'Popularity': 0.1,
        'Ban Rate': 0.1,
        'Scaling': 0.05,
        'Cooldown': 0.05,
        'Item Dependency': 0.05,
        'Mobility': 0.05,
        'Crowd Control': 0.05,
        'Base Stats Growth': 0.05,
        'Ultimate Impact All Phases': 0.05,
        'Ultimate Impact Early Game': 0.03,
        'Ultimate Impact Late Game': 0.03,
        'Ultimate Impact Mid Game': 0.03,
        'Ultimate Impact Support': 0.03
    }
    
    # Normalisasi dan perhitungan
    strength = (
        row['Win Rate (%)'] / 100 * weights['Win Rate'] +
        row['Popularity (%)'] * weights['Popularity'] +
        row['Ban Rate (%)'] / 100 * weights['Ban Rate'] +
        row['Scaling Rating'] * weights['Scaling'] +
        row['Cooldown Rating'] * weights['Cooldown'] +
        row['Item Dependency Rating'] * weights['Item Dependency'] +
        row['Mobility Rating'] * weights['Mobility'] +
        row['Crowd Control Rating'] * weights['Crowd Control'] +
        row['Base Stats Growth Rating'] * weights['Base Stats Growth'] +
        row['Ultimate Impact Rating_All Game Phases'] * weights['Ultimate Impact All Phases'] +
        row['Ultimate Impact Rating_Early Game'] * weights['Ultimate Impact Early Game'] +
        row['Ultimate Impact Rating_Late Game'] * weights['Ultimate Impact Late Game'] +
        row['Ultimate Impact Rating_Mid Game'] * weights['Ultimate Impact Mid Game'] +
        row['Ultimate Impact Rating_Support'] * weights['Ultimate Impact Support']
    ) * 100  
    
    return round(strength, 2)

In [None]:
fitur['Strength Rating (%)'] = fitur.apply(calculateStrength, axis=1)
fitur = fitur.sort_values('Strength Rating (%)', ascending=False)

In [None]:
fitur.insert(1, 'Role', herodata['Roles'])

In [None]:
# fitur.to_csv('../data/list Hero.csv', index=False)

In [None]:
fitur.columns

In [None]:
def normalize_role(role):
    if role.startswith('Fighter'):
        return 'Fighter'
    elif role.startswith('Tank'):
        return 'Tank'
    elif role.startswith('Assassin'):
        return 'Assassin'
    elif role.startswith('Mage'):
        return 'Mage'
    elif role.startswith('Marksman'):
        return 'Marksman'
    elif role.startswith('Support'):
        return 'Support'
    return role  

fitur['Role'] = fitur['Role'].apply(normalize_role)

In [None]:
def normalize_role(role):
    if role.startswith('Fighter'):
        return 'Exp Lane'
    elif role.startswith('Tank'):
        return 'Roam'
    elif role.startswith('Assassin'):
        return 'Jungler'
    elif role.startswith('Mage'):
        return 'Mid Laner'
    elif role.startswith('Marksman'):
        return 'Gold Laner'
    elif role.startswith('Support'):
        return 'Roam'
    return role  

fitur['Role'] = fitur['Role'].apply(normalize_role)

In [None]:
def getHeroData(hero_name):
    heroData = fitur[fitur['Hero Name'].str.lower() == hero_name.lower()]
    if not heroData.empty:
        return heroData.iloc[0]
    else:
        return None

In [None]:
def matchingHeroLane(dataHero, labelMap, df):
    # print('Ini DataHero', dataHero)
    # print('Ini DF', df)
    # Reverse the mappings in labelMap
    reverse_mappings = {
        col: {v: k for k, v in mapping.items()}
        for col, mapping in labelMap.items()
    }

    # Define the mapping of index to lane
    index_to_lane = {
        0: 'Jungler',
        1: 'Mid Laner',
        2: 'Gold Laner',
        3: 'Exp Lane',
        4: 'Roam'
    }

    unmatchedLane = 0  # Default to 0 if no unmatched lanes are found

    if isinstance(dataHero, dict):
        # Convert dataHero to a pandas Series if it's a dictionary
        dataHero = pd.Series(dataHero)

    if dataHero is not None:
        # Decode 'Recommended Lane' and 'Second Lane' using reverse_mappings
        dataHero['Recommended Lane'] = reverse_mappings['Recommended Lane'].get(
            dataHero['Recommended Lane'], dataHero['Recommended Lane']
        )

        dataHero['Second Lane'] = reverse_mappings['Second Lane'].get(
            dataHero['Second Lane'], dataHero['Second Lane']
        )

        # Check if the hero's lanes match the lane corresponding to the index
        for index, lane in index_to_lane.items():
            print('Lane iterasi saat ini:', lane)
            print('Reccomended Lane saat ini:', dataHero['Recommended Lane'])
            print('Second Lane saat ini:', dataHero['Second Lane'])
            if dataHero['Recommended Lane'] != lane and dataHero['Second Lane'] != lane:
                # print(f"Unmatched lane: {lane} for hero {dataHero['Hero Name']} rec lane {dataHero['Role']} sec lane {dataHero['Second Lane']}")
                unmatchedLane += 1
            else:
                unmatchedLane += 0
    else:
        print("dataHero is empty")

    return unmatchedLane  # Return the number of unmatched lanes

In [None]:
def calculateTeamStrength(team, labelMap, df):
    totalStrength = 0
    hero_data = {}
    for hero in team:
        data = getHeroData(hero)
        
        unmatchedLane = matchingHeroLane(data, labelMap, df)

        if data is not None:
            totalStrength += data['Strength Rating (%)']
            hero_data[hero] = data
        else:
            print(f"Hero {hero} tidak ditemukan dalam dataset.")
    return totalStrength, hero_data, unmatchedLane

In [None]:
def calculateWinPercentage(team1, team2, df, labelMap):
    # Hitung kekuatan dan data dari masing-masing tim
    team1Strength, team1_data, unmatchedLane1 = calculateTeamStrength(team1, labelMap, df)
    team2Strength, team2_data, unmatchedLane2 = calculateTeamStrength(team2, labelMap, df)

    print(f"Lane yang tidak cocok untuk tim 1: {unmatchedLane1}")
    print(f"Lane yang tidak cocok untuk tim 2: {unmatchedLane2}")

    # Hitung total kekuatan gabungan
    totalStrength = team1Strength + team2Strength
    if totalStrength == 0:
        return 50.0, 50.0, team1_data, team2_data  # Hindari pembagian dengan nol

    # Hitung persentase dasar berdasarkan kekuatan
    team1Base = (team1Strength / totalStrength) * 100
    team2Base = (team2Strength / totalStrength) * 100

    # Terapkan penalti dari lane yang tidak cocok (anggap 5% per mismatch)
    penalty_per_unmatched = 5
    team1Penalty = unmatchedLane1 * penalty_per_unmatched
    team2Penalty = unmatchedLane2 * penalty_per_unmatched

    team1WinPercentage = max(team1Base - team1Penalty, 0)
    team2WinPercentage = max(team2Base - team2Penalty, 0)

    # Normalisasi ulang agar total tetap 100%
    total = team1WinPercentage + team2WinPercentage
    if total > 0:
        team1WinPercentage = (team1WinPercentage / total) * 100
        team2WinPercentage = (team2WinPercentage / total) * 100
    else:
        team1WinPercentage = 50.0
        team2WinPercentage = 50.0

    return team1WinPercentage, team2WinPercentage, team1_data, team2_data

In [None]:
def createTeamBasedOnRoles(hero_data, excluded_heroes=[]):
    # Role baru yang digunakan untuk tim
    roles = ['Exp Lane', 'Roam', 'Mid Laner', 'Gold Laner', 'Jungler']
    team = []
    
    for role in roles:
        # Filter hero yang memiliki role yang sesuai dan tidak termasuk dalam excluded_heroes
        heroes_with_role = hero_data[hero_data['Role'].str.contains(role) & (~hero_data['Hero Name'].isin(excluded_heroes))]
        
        # Jika ada hero yang tersisa, pilih satu secara acak
        if not heroes_with_role.empty:
            selected_hero = heroes_with_role.sample(1)
            team.append(selected_hero['Hero Name'].values[0])
            excluded_heroes.append(selected_hero['Hero Name'].values[0])  

    return team

In [None]:
print(fitur['Role'].value_counts())

In [None]:
def generate_match_data(team1, team2, team1_data, team2_data, labelMap, df):
    team1WinPercentage, team2WinPercentage, team1_data_processed, team2_data_processed = calculateWinPercentage(team1, team2, df, labelMap)
    
    # Data untuk setiap hero di Tim 1
    team1_data_processed = {
        f'team1_Hero_{i+1}_Name': team1_data[hero]['Hero Name'] for i, hero in enumerate(team1)
    }
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Role': team1_data[hero]['Role'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Win_Rate': team1_data[hero]['Win Rate (%)'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Popularity': team1_data[hero]['Popularity (%)'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ban_Rate': team1_data[hero]['Ban Rate (%)'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Scaling_Rating': team1_data[hero]['Scaling Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Cooldown_Rating': team1_data[hero]['Cooldown Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Item_Dependency_Rating': team1_data[hero]['Item Dependency Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Mobility_Rating': team1_data[hero]['Mobility Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Crowd_Control_Rating': team1_data[hero]['Crowd Control Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Base_Stats_Growth_Rating': team1_data[hero]['Base Stats Growth Rating'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ultimate_Impact_Rating_All_Game_Phases': team1_data[hero]['Ultimate Impact Rating_All Game Phases'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ultimate_Impact_Rating_Early_Game': team1_data[hero]['Ultimate Impact Rating_Early Game'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ultimate_Impact_Rating_Late_Game': team1_data[hero]['Ultimate Impact Rating_Late Game'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ultimate_Impact_Rating_Mid_Game': team1_data[hero]['Ultimate Impact Rating_Mid Game'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Ultimate_Impact_Rating_Support': team1_data[hero]['Ultimate Impact Rating_Support'] for i, hero in enumerate(team1)
    })
    team1_data_processed.update({
        f'team1_Hero_{i+1}_Strength_Rating': team1_data[hero]['Strength Rating (%)'] for i, hero in enumerate(team1)
    })
    
    # Data untuk setiap hero di Tim 2
    team2_data_processed = {
        f'team2_Hero_{i+1}_Name': team2_data[hero]['Hero Name'] for i, hero in enumerate(team2)
    }
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Role': team2_data[hero]['Role'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Win_Rate': team2_data[hero]['Win Rate (%)'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Popularity': team2_data[hero]['Popularity (%)'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ban_Rate': team2_data[hero]['Ban Rate (%)'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Scaling_Rating': team2_data[hero]['Scaling Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Cooldown_Rating': team2_data[hero]['Cooldown Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Item_Dependency_Rating': team2_data[hero]['Item Dependency Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Mobility_Rating': team2_data[hero]['Mobility Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Crowd_Control_Rating': team2_data[hero]['Crowd Control Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Base_Stats_Growth_Rating': team2_data[hero]['Base Stats Growth Rating'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ultimate_Impact_Rating_All_Game_Phases': team2_data[hero]['Ultimate Impact Rating_All Game Phases'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ultimate_Impact_Rating_Early_Game': team2_data[hero]['Ultimate Impact Rating_Early Game'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ultimate_Impact_Rating_Late_Game': team2_data[hero]['Ultimate Impact Rating_Late Game'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ultimate_Impact_Rating_Mid_Game': team2_data[hero]['Ultimate Impact Rating_Mid Game'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Ultimate_Impact_Rating_Support': team2_data[hero]['Ultimate Impact Rating_Support'] for i, hero in enumerate(team2)
    })
    team2_data_processed.update({
        f'team2_Hero_{i+1}_Strength_Rating': team2_data[hero]['Strength Rating (%)'] for i, hero in enumerate(team2)
    })
    
    # Data tim 1 dan tim 2
    match_data = {**team1_data_processed, **team2_data_processed, 'Persentase_Kemenangan_Tim_1': team1WinPercentage, 'Persentase_Kemenangan_Tim_2': team2WinPercentage}
    
    return match_data

In [None]:
matches = []
for _ in range(20000):  # Increase the number of matches to generate
    excluded_heroes_team1 = []
    excluded_heroes_team2 = []
    
    # Membuat tim 1 dengan memilih hero berdasarkan role
    team1 = createTeamBasedOnRoles(fitur, excluded_heroes_team1)
    excluded_heroes_team1.extend(team1)  # Menambahkan hero yang sudah dipilih ke excluded list tim 1
    
    # Membuat tim 2 dengan memilih hero berdasarkan role, pastikan hero tidak duplikat dengan tim 1
    team2 = createTeamBasedOnRoles(fitur, excluded_heroes_team1 + excluded_heroes_team2)
    excluded_heroes_team2.extend(team2)  # Menambahkan hero yang sudah dipilih ke excluded list tim 2
    
    if len(team1) == 5 and len(team2) == 5:
        # Mendapatkan data hero untuk setiap hero yang telah dipilih dalam tim 1 dan tim 2
        team1_data = {hero: fitur[fitur['Hero Name'] == hero].iloc[0].to_dict() for hero in team1}
        team2_data = {hero: fitur[fitur['Hero Name'] == hero].iloc[0].to_dict() for hero in team2}
        
        match_data = generate_match_data(team1, team2, team1_data, team2_data, label_mappings, fitur)
        matches.append(match_data)
    else:
        print(f"Skipping match generation: Team 1 or Team 2 didn't get 5 heroes.")

In [None]:
match = pd.DataFrame(matches)

In [None]:
pd.set_option('display.max_columns', None) 
pd.set_option('display.width', None)

In [None]:
# Menampilkan nama-nama kolom
print(match.columns.tolist())

In [None]:
match.head(10)

In [None]:
def labelResult(row):
    if row['Persentase_Kemenangan_Tim_1'] > row['Persentase_Kemenangan_Tim_2']:
        return 'Menang'
    elif row['Persentase_Kemenangan_Tim_1'] < row['Persentase_Kemenangan_Tim_2']:
        return 'Kalah'
    else:
        return 'Seri'

In [None]:
match['result'] = match.apply(labelResult, axis=1)

In [None]:
match['result'].value_counts()

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
match.to_csv('../data/match history.csv', index=False)

In [None]:
team1 = ['Lolita', 'Harith', 'Vexana', 'Gatotkaca','Hayabusa']
team2 = ['Hylos', 'Ling', 'Edith', 'Moskov','Xavier']

In [None]:
fitur.head(5)

In [None]:
team1WinPercentage, team2WinPercentage, team1, team2 = calculateWinPercentage(team1, team2, fitur, label_mappings)

In [None]:
def checkMatch(row) :
    if row['Persentase_Kemenangan_Tim_1'] > row['Persentase_Kemenangan_Tim_2'] and row['result'] == 'Menang' :
        return 'Valid'
    if row['Persentase_Kemenangan_Tim_2'] > row['Persentase_Kemenangan_Tim_1'] and row['result'] == 'Kalah' :
        return 'Valid'
    if row['Persentase_Kemenangan_Tim_1'] == row['Persentase_Kemenangan_Tim_2'] and row['result'] == 'Seri' :
        return 'Draw'
    else :
        return 'Tidak Valid'    

In [None]:
match['validasi'] = match.apply(checkMatch, axis=1)
match[match['validasi'] == 'Tidak Valid'].head()

In [None]:
# print(f"Tim 1: {team1}")
# print(f"Tim 2: {team2}")
print(f"Persentase kemenangan Tim 1: {team1WinPercentage:.2f}%")
print(f"Persentase kemenangan Tim 2: {team2WinPercentage:.2f}%")

In [None]:
numerical_features = fitur.select_dtypes(include=['number'])
correlation_matrix = numerical_features.corr()

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Matriks Korelasi Antar Fitur Numerik')
plt.show()

In [None]:
filtered_corr = correlation_matrix[correlation_matrix > 0.5]

In [None]:
filtered_corr.index

In [None]:
print(match.columns.tolist())

In [None]:
features = match.select_dtypes(include=['number'])
target = match['result']

In [None]:
features.columns

In [None]:
features.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled.shape

In [None]:
X_train_scaled

In [None]:
pca = PCA(n_components=0.95) 
X_train_pca = pca.fit_transform(X_train_scaled)

In [None]:
X_train_pca.shape

In [None]:
print("Rasio varians yang dijelaskan oleh setiap komponen PCA:")
print(pca.explained_variance_ratio_)

In [None]:
print("Kontribusi fitur asli terhadap komponen utama:")
print(pca.components_)

In [None]:
X_test_pca = pca.transform(X_test_scaled)

In [None]:
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train_pca, y_train)

In [None]:
y_pred = model.predict(X_test_pca)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi Model: {accuracy}")

In [None]:
print("Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
plt.figure(figsize=(8, 6))

sns.scatterplot(x=X_test_pca[:, 0], y=X_test_pca[:, 1], hue=y_test, palette='viridis', s=100, alpha=0.7)

plt.title("Visualisasi Data Uji Setelah PCA (2 Komponen Utama)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.legend(title='Label', loc='upper right')
plt.show()

In [None]:
with open('../models/model.pkl', 'wb') as f:
    pickle.dump(model, f)
    
with open('../models/pca.pkl', 'wb') as f:
    pickle.dump(pca, f)