In [5]:
import pandas as pd
import numpy as np
from scipy.linalg import lstsq
from scipy.optimize import nnls
import warnings

# Memuat file-file yang diperlukan
standardized_data = pd.read_csv('standardized_data.csv')  # File Z-Score
archetype_values = pd.read_csv('archetype_values.csv')  # File Nilai Arketipe
membership_matrix = pd.read_csv('membership_matrix.csv')  # File Matriks Keanggotaan

print("Data yang dimuat:")
print(f"Standardized data shape: {standardized_data.shape}")
print(f"Archetype values shape: {archetype_values.shape}")
print(f"Membership matrix shape: {membership_matrix.shape}")

# 1. Persiapkan Matriks X (Z-Score data)
# X berukuran (n_samples, n_features)
X = standardized_data[['jumlah_game_z', 'total_waktu_z', 'total_achievements_z']].values
n_samples, n_features = X.shape
print(f"\nMatriks X shape: {X.shape}")

# 2. Persiapkan Matriks B (Keanggotaan)
# B berukuran (n_samples, n_archetypes)
B = membership_matrix[['Arketipe 1', 'Arketipe 2', 'Arketipe 3', 'Arketipe 4']].values
n_archetypes = B.shape[1]
print(f"Matriks B shape: {B.shape}")

# Validasi: pastikan jumlah baris B sama dengan jumlah baris X
assert B.shape[0] == X.shape[0], "Jumlah sampel dalam B dan X harus sama"

# 3. Hitung Nilai Arketipe (Z) menggunakan weighted average
# Z berukuran (n_archetypes, n_features)
Z = np.zeros((n_archetypes, n_features))

for i in range(n_archetypes):
    # Hitung bobot untuk setiap arketipe
    weights = B[:, i]
    # Hindari pembagian dengan nol
    weight_sum = np.sum(weights)
    if weight_sum > 0:
        # Weighted average untuk setiap fitur
        Z[i, :] = np.sum(X * weights.reshape(-1, 1), axis=0) / weight_sum
    else:
        print(f"Warning: Arketipe {i+1} memiliki total bobot 0")

print(f"\nMatriks Z (Archetypal values) shape: {Z.shape}")
print("Nilai Arketipe Z:")
print(Z)

# 4. Perhitungan Matriks A (Koefisien Archetypal)
# Dalam archetypal analysis: X ≈ A * Z
# Dimana A berukuran (n_samples, n_archetypes)
# Kita perlu menyelesaikan: X = A * Z untuk mendapatkan A

# Metode 1: Menggunakan least squares biasa
print("\n=== Metode 1: Least Squares Biasa ===")
A_lstsq = np.zeros((n_samples, n_archetypes))

for i in range(n_samples):
    # Untuk setiap sampel, selesaikan: X[i] = A[i] * Z
    # Atau: Z.T * A[i].T = X[i].T
    A_lstsq[i, :], residuals, rank, s = lstsq(Z.T, X[i, :])

print(f"Matriks A (least squares) shape: {A_lstsq.shape}")

# Metode 2: Menggunakan Non-Negative Least Squares (NNLS)
# Ini lebih sesuai untuk archetypal analysis karena koefisien harus non-negatif
print("\n=== Metode 2: Non-Negative Least Squares ===")
A_nnls = np.zeros((n_samples, n_archetypes))

for i in range(n_samples):
    # NNLS memastikan koefisien non-negatif
    A_nnls[i, :], residual = nnls(Z.T, X[i, :])

print(f"Matriks A (NNLS) shape: {A_nnls.shape}")

# 5. Normalisasi Matriks A (opsional)
# Dalam archetypal analysis, sering kali A dinormalisasi sehingga sum(A[i]) = 1
print("\n=== Normalisasi Matriks A ===")
A_normalized = A_nnls.copy()
for i in range(n_samples):
    row_sum = np.sum(A_normalized[i, :])
    if row_sum > 0:
        A_normalized[i, :] = A_normalized[i, :] / row_sum

# 6. Evaluasi kualitas rekonstruksi
print("\n=== Evaluasi Kualitas Rekonstruksi ===")

# Rekonstruksi data menggunakan A dan Z
X_reconstructed_lstsq = np.dot(A_lstsq, Z)
X_reconstructed_nnls = np.dot(A_nnls, Z)
X_reconstructed_normalized = np.dot(A_normalized, Z)

# Hitung Mean Squared Error (MSE)
mse_lstsq = np.mean((X - X_reconstructed_lstsq) ** 2)
mse_nnls = np.mean((X - X_reconstructed_nnls) ** 2)
mse_normalized = np.mean((X - X_reconstructed_normalized) ** 2)

print(f"MSE (Least Squares): {mse_lstsq:.6f}")
print(f"MSE (NNLS): {mse_nnls:.6f}")
print(f"MSE (Normalized): {mse_normalized:.6f}")

# 7. Pilih metode terbaik dan simpan hasil
print("\n=== Menyimpan Hasil ===")

# Pilih metode dengan MSE terendah
methods = {
    'lstsq': (A_lstsq, mse_lstsq),
    'nnls': (A_nnls, mse_nnls),
    'normalized': (A_normalized, mse_normalized)
}

best_method = min(methods.keys(), key=lambda x: methods[x][1])
best_A, best_mse = methods[best_method]

print(f"Metode terbaik: {best_method} dengan MSE: {best_mse:.6f}")

# Simpan Matriks A (dengan Steam ID sebagai index)
A_df = pd.DataFrame(
    best_A, 
    index=standardized_data['Steam ID'],
    columns=[f'Arketipe {i+1}' for i in range(n_archetypes)]
)

# Simpan ke file CSV
A_df.to_csv('matrices_A.csv')
print("Matriks A telah disimpan ke 'matrices_A.csv'")

# Simpan juga nilai arketipe yang telah dihitung
Z_df = pd.DataFrame(
    Z,
    index=[f'Arketipe {i+1}' for i in range(n_archetypes)],
    columns=['jumlah_game_z', 'total_waktu_z', 'total_achievements_z']
)
Z_df.to_csv('archetypal_values_calculated.csv')
print("Nilai arketipe telah disimpan ke 'archetypal_values_calculated.csv'")

# Tampilkan statistik deskriptif
print("\n=== Statistik Deskriptif Matriks A ===")
print(A_df.describe())

print("\n=== Beberapa Baris Pertama Matriks A ===")
print(A_df.head())

print("\n=== Distribusi Keanggotaan Arketipe ===")
# Tentukan arketipe dominan untuk setiap pemain
dominant_archetype = A_df.idxmax(axis=1)
archetype_counts = dominant_archetype.value_counts()
print(archetype_counts)

# Hitung persentase
archetype_percentages = (archetype_counts / len(A_df)) * 100
print("\nPersentase distribusi:")
for archetype, percentage in archetype_percentages.items():
    print(f"{archetype}: {percentage:.1f}%")

Data yang dimuat:
Standardized data shape: (44, 4)
Archetype values shape: (4, 4)
Membership matrix shape: (44, 5)

Matriks X shape: (44, 3)
Matriks B shape: (44, 4)

Matriks Z (Archetypal values) shape: (4, 3)
Nilai Arketipe Z:
[[ 0.15932921  0.10855383  0.00123837]
 [-0.06189753 -0.09477561  0.05283836]
 [-0.09711269  0.01148946 -0.00144197]
 [ 0.02084793 -0.01542777 -0.05229821]]

=== Metode 1: Least Squares Biasa ===
Matriks A (least squares) shape: (44, 4)

=== Metode 2: Non-Negative Least Squares ===
Matriks A (NNLS) shape: (44, 4)

=== Normalisasi Matriks A ===

=== Evaluasi Kualitas Rekonstruksi ===
MSE (Least Squares): 0.000000
MSE (NNLS): 0.000000
MSE (Normalized): 0.936046

=== Menyimpan Hasil ===
Metode terbaik: nnls dengan MSE: 0.000000
Matriks A telah disimpan ke 'matrices_A.csv'
Nilai arketipe telah disimpan ke 'archetypal_values_calculated.csv'

=== Statistik Deskriptif Matriks A ===
       Arketipe 1  Arketipe 2  Arketipe 3  Arketipe 4
count   44.000000   44.000000   4

In [6]:
import pandas as pd
import numpy as np

def convert_coefficients_to_percentage(input_file='matrices_A.csv', output_file='matrices_A_percentage.csv'):
    """
    Mengkonversi koefisien archetypal menjadi persentase desimal (0-1)
    
    Parameters:
    - input_file: nama file CSV yang berisi matriks A
    - output_file: nama file output untuk menyimpan hasil konversi
    """
    
    # Baca file matriks A
    A_df = pd.read_csv(input_file, index_col=0)
    
    print("=== DATA ASLI ===")
    print(f"Shape: {A_df.shape}")
    print("\nBeberapa baris pertama:")
    print(A_df.head())
    
    # Buat copy untuk hasil konversi
    A_percentage = A_df.copy()
    
    # Konversi setiap baris menjadi persentase desimal
    for index, row in A_percentage.iterrows():
        # Hitung total nilai dalam baris
        total = row.sum()
        
        # Jika total > 0, konversi ke persentase desimal
        if total > 0:
            A_percentage.loc[index] = row / total
        else:
            # Jika total = 0, set semua nilai ke 0
            A_percentage.loc[index] = 0
            print(f"Warning: Steam ID {index} memiliki total koefisien 0")
    
    print("\n=== DATA SETELAH KONVERSI ===")
    print("Beberapa baris pertama (dalam format persentase desimal):")
    print(A_percentage.head())
    
    # Validasi: pastikan setiap baris berjumlah 1 (atau mendekati 1)
    print("\n=== VALIDASI ===")
    row_sums = A_percentage.sum(axis=1)
    print(f"Jumlah setiap baris (harus ≈ 1.0):")
    print(f"Min: {row_sums.min():.6f}")
    print(f"Max: {row_sums.max():.6f}")
    print(f"Mean: {row_sums.mean():.6f}")
    
    # Contoh untuk Steam ID tertentu
    print("\n=== CONTOH KONVERSI ===")
    if len(A_percentage) > 0:
        # Ambil baris pertama sebagai contoh
        example_index = A_percentage.index[0]
        
        print(f"Steam ID: {example_index}")
        print("Nilai asli:")
        for col in A_df.columns:
            print(f"  {col}: {A_df.loc[example_index, col]:.6f}")
        
        print("Nilai setelah konversi (persentase desimal):")
        for col in A_percentage.columns:
            print(f"  {col}: {A_percentage.loc[example_index, col]:.6f}")
        
        print("Dalam format persentase (%):")
        for col in A_percentage.columns:
            print(f"  {col}: {A_percentage.loc[example_index, col]*100:.2f}%")
    
    # Simpan hasil ke file CSV
    A_percentage.to_csv(output_file)
    print(f"\nHasil konversi telah disimpan ke '{output_file}'")
    
    # Simpan juga file dengan format yang lebih rapi (rounded)
    A_percentage_rounded = A_percentage.round(6)
    rounded_file = output_file.replace('.csv', '_rounded.csv')
    A_percentage_rounded.to_csv(rounded_file)
    print(f"File dengan nilai yang dibulatkan disimpan ke '{rounded_file}'")
    
    # Statistik deskriptif
    print("\n=== STATISTIK DESKRIPTIF (PERSENTASE DESIMAL) ===")
    print(A_percentage.describe())
    
    # Distribusi arketipe dominan
    print("\n=== DISTRIBUSI ARKETIPE DOMINAN ===")
    dominant_archetype = A_percentage.idxmax(axis=1)
    archetype_counts = dominant_archetype.value_counts()
    archetype_percentages = (archetype_counts / len(A_percentage)) * 100
    
    print("Jumlah pemain per arketipe:")
    for archetype, count in archetype_counts.items():
        print(f"  {archetype}: {count} pemain ({archetype_percentages[archetype]:.1f}%)")
    
    return A_percentage

# Fungsi untuk konversi manual (jika ingin menggunakan data langsung)
def manual_conversion_example():
    """
    Contoh konversi manual untuk satu pemain
    """
    print("=== CONTOH KONVERSI MANUAL ===")
    
    # Data contoh dari Steam ID yang Anda berikan
    steam_id = "76561197960269409"
    coefficients = {
        'Arketipe 1': 9.595489630755289,
        'Arketipe 2': 1.3499353750721561,
        'Arketipe 3': 4.040678477074641,
        'Arketipe 4': 0.0
    }
    
    print(f"Steam ID: {steam_id}")
    print("\nKoefisien asli:")
    for archetype, value in coefficients.items():
        print(f"  {archetype}: {value:.6f}")
    
    # Hitung total
    total = sum(coefficients.values())
    print(f"\nTotal koefisien: {total:.6f}")
    
    # Konversi ke persentase desimal
    percentages = {archetype: value/total for archetype, value in coefficients.items()}
    
    print("\nPersentase desimal (0-1):")
    for archetype, percentage in percentages.items():
        print(f"  {archetype}: {percentage:.6f}")
    
    print("\nPersentase biasa (%):")
    for archetype, percentage in percentages.items():
        print(f"  {archetype}: {percentage*100:.2f}%")
    
    # Validasi
    total_percentage = sum(percentages.values())
    print(f"\nValidasi - Total persentase: {total_percentage:.6f} (harus = 1.0)")
    
    return percentages

# Jalankan fungsi
if __name__ == "__main__":
    # Contoh penggunaan dengan file
    try:
        A_percentage = convert_coefficients_to_percentage()
        
        # Akses hasil untuk Steam ID tertentu
        steam_id = "76561197960269409"  # Ganti dengan Steam ID yang ada di data Anda
        if steam_id in A_percentage.index:
            print(f"\n=== HASIL UNTUK STEAM ID {steam_id} ===")
            result = A_percentage.loc[steam_id]
            for archetype, value in result.items():
                print(f"{archetype}: {value:.6f}")
        
    except FileNotFoundError:
        print("File 'matrices_A.csv' tidak ditemukan.")
        print("Menjalankan contoh konversi manual...")
        manual_conversion_example()
    
    # Selalu jalankan contoh manual untuk demonstrasi
    print("\n" + "="*50)
    manual_conversion_example()

=== DATA ASLI ===
Shape: (44, 4)

Beberapa baris pertama:
                   Arketipe 1  Arketipe 2  Arketipe 3  Arketipe 4
Steam ID                                                         
76561197960269409    9.595490    1.349935   14.040678    0.000000
76561197962437769    0.000000    8.087091    2.662179   21.160973
76561197977935089   10.272164    0.000000   18.395207   11.104625
76561197983588742   10.057310    5.290587    0.000000    0.282747
76561197985705149    0.000000    7.519960    6.809934   19.626035

=== DATA SETELAH KONVERSI ===
Beberapa baris pertama (dalam format persentase desimal):
                   Arketipe 1  Arketipe 2  Arketipe 3  Arketipe 4
Steam ID                                                         
76561197960269409    0.384033    0.054027    0.561939    0.000000
76561197962437769    0.000000    0.253432    0.083427    0.663140
76561197977935089    0.258276    0.000000    0.462517    0.279207
76561197983588742    0.643435    0.338475    0.000000    0.01

In [9]:
import pandas as pd

# 1. Baca data
df = pd.read_csv("combined_dataset.csv")

# 2. Hitung jumlah game yang dimiliki per pemain
df_game_count = df.groupby('Steam ID')['Game Name'].count().reset_index()
df_game_count.columns = ['Steam ID', 'Owned_Games']

# 3. Hitung median dan rata-rata
median_games = df_game_count['Owned_Games'].median()
mean_games = df_game_count['Owned_Games'].mean()

print(f"Median jumlah game: {median_games:.0f}")
print(f"Rata-rata jumlah game: {mean_games:.2f}")

# 4. Hitung jumlah pemain dengan game < median
below_median = (df_game_count['Owned_Games'] < median_games).sum()
total_users = len(df_game_count)

print(f"\nJumlah pemain dengan game < median: {below_median} dari {total_users} pemain")
print(f"Persentase pemain yang akan dihapus: {below_median / total_users * 100:.2f}%")


  df = pd.read_csv("combined_dataset.csv")


Median jumlah game: 261
Rata-rata jumlah game: 454.65

Jumlah pemain dengan game < median: 113 dari 227 pemain
Persentase pemain yang akan dihapus: 49.78%


In [10]:
import pandas as pd

# Load data
df = pd.read_csv("combined_dataset.csv")

# Hitung jumlah game per pemain
game_counts = df.groupby("Steam ID")["Game Name"].nunique()

# Hitung Q1, Q2, Q3
q1 = game_counts.quantile(0.25)
q2 = game_counts.quantile(0.50)
q3 = game_counts.quantile(0.75)

print(f"Q1: {q1}")
print(f"Median (Q2): {q2}")
print(f"Q3: {q3}")


  df = pd.read_csv("combined_dataset.csv")


Q1: 126.5
Median (Q2): 252.0
Q3: 491.5


In [11]:
import pandas as pd

# Load data
df = pd.read_csv("combined_dataset.csv")

# Hitung jumlah game per pemain (berdasarkan Steam ID)
game_count_per_player = df.groupby("Steam ID")["Game Name"].nunique().reset_index()
game_count_per_player.columns = ["Steam ID", "total_owned_games"]

# Gabungkan kembali ke dataset asli jika dibutuhkan
df = df.merge(game_count_per_player, on="Steam ID")

# Hitung Q1, Q3, dan IQR
Q1 = game_count_per_player["total_owned_games"].quantile(0.25)
Q3 = game_count_per_player["total_owned_games"].quantile(0.75)
IQR = Q3 - Q1

# Hitung batas bawah
lower_bound = Q1 - 1.5 * IQR

print(f"Q1: {Q1}")
print(f"Q3: {Q3}")
print(f"IQR: {IQR}")
print(f"Lower Bound: {lower_bound}")

# Filter pemain dengan jumlah game >= lower bound
valid_players = game_count_per_player[game_count_per_player["total_owned_games"] >= lower_bound]
print(f"Jumlah pemain yang valid: {len(valid_players)} dari {len(game_count_per_player)}")

# (Opsional) Filter dataset asli jika ingin lanjut hanya dengan data bersih
cleaned_df = df[df["Steam ID"].isin(valid_players["Steam ID"])]


  df = pd.read_csv("combined_dataset.csv")


Q1: 126.5
Q3: 491.5
IQR: 365.0
Lower Bound: -421.0
Jumlah pemain yang valid: 227 dari 227
