TOP LOMBA 

In [53]:
import pandas as pd

# Membaca data dari file CSV
lombas_df = pd.read_csv('lombas.csv')
kategoris_df = pd.read_csv('kategoris.csv')
pesertas_df = pd.read_csv('pesertas.csv')

# Menghitung jumlah peserta per lomba
lomba_participants = pesertas_df.groupby('idLomba').size().reset_index(name='participant_count')

# Menggabungkan data lomba dengan jumlah peserta
lombas_df = lombas_df.merge(lomba_participants, left_on='id', right_on='idLomba', how='left')

# Mengganti nilai NaN dengan 0 (jika ada lomba tanpa peserta)
lombas_df['participant_count'].fillna(0, inplace=True)

# Mengurutkan lomba berdasarkan jumlah peserta (dari yang terbanyak)
lombas_df = lombas_df.sort_values(by='participant_count', ascending=False)

# Mengambil kolom yang relevan untuk rekomendasi
recommended_lombas = lombas_df[['id','namaLomba']]

# Menampilkan id 5 lomba yang paling banyak diikuti
top_5_lombas_id = recommended_lombas.head(5)
print(top_5_lombas_id.to_string(index=False))


 id                        namaLomba
  8                      Ranked Comp
 10 Kontes Robot Bawah Air Indonesia
  5                           Gamify
  2                         Gemastik
  7                    Ksatria Compe


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  lombas_df['participant_count'].fillna(0, inplace=True)


Similiar berdasarkan deskripsi

In [55]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Membaca data dari file CSV
lombas_df = pd.read_csv('lombas.csv')

# Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
tfidf = TfidfVectorizer(stop_words='english')

# Replace NaN with an empty string
lombas_df['deskripsiLomba'] = lombas_df['deskripsiLomba'].fillna('')

# Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(lombas_df['deskripsiLomba'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Construct a reverse map of indices and lomba titles
indices = pd.Series(lombas_df.index, index=lombas_df['namaLomba']).drop_duplicates()

# Function that takes in lomba name as input and outputs 5 most similar lombas
def get_recommendations_lomba(nama_lomba, cosine_sim=cosine_sim):
    # Get the index of the lomba that matches the name
    idx = indices[nama_lomba]

    # Get the pairwise similarity scores of all lombas with that lomba
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the lombas based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the top 5 most similar lombas
    lomba_indices = [i[0] for i in sim_scores[1:6]]  # Mengambil 5 lomba teratas

    # Return the top 5 most similar lombas
    return lombas_df['namaLomba'].iloc[lomba_indices]

# Contoh penggunaan: merekomendasikan 5 lomba berdasarkan lomba dengan nama tertentu
recommended_lombas = get_recommendations_lomba('Gemastik')
print("Rekomendasi 5 Lomba Berdasarkan Lomba Tertentu:")
print(recommended_lombas)


Rekomendasi 5 Lomba Berdasarkan Lomba Tertentu:
14           Hackathon
15    Capture The Flag
21              SeFest
3             Dicoding
18         Satria Data
Name: namaLomba, dtype: object


Similiar berdasarkan kategori

In [49]:
import pandas as pd

# Membaca data dari file CSV
lombas_df = pd.read_csv('lombas.csv')

# Function that takes in category ID as input and outputs 5 most similar lombas within the same category
def get_recommendations_by_category(id_kategori):
    # Filter lombas by category
    lombas_in_category = lombas_df[lombas_df['idKategori'] == id_kategori]

    # Get the top 5 lombas in the same category
    top_lombas = lombas_in_category.head(5)

    # Select only idLomba and namaLomba columns
    top_lombas = top_lombas[['id', 'namaLomba']]

    return top_lombas

# Contoh penggunaan: merekomendasikan 5 lomba dalam kategori dengan ID 1
recommended_lombas = get_recommendations_by_category(1)
print("Rekomendasi 5 Lomba dalam Kategori yang Sama:")
print(recommended_lombas.to_string(index=False))



Rekomendasi 5 Lomba dalam Kategori yang Sama:
 id      namaLomba
  1 Figma Designer
  2       Gemastik
