In [1]:
import gensim
from gensim.models import Word2Vec
import pandas as pd

In [2]:
parameters = [
    {'model_type': 'cbow', 'window': 2, 'vector_size': 100},
    {'model_type': 'skipgram', 'window': 2, 'vector_size': 100},
    {'model_type': 'cbow', 'window': 4, 'vector_size': 100},
    {'model_type': 'skipgram', 'window': 4, 'vector_size': 100},
    {'model_type': 'cbow', 'window': 2, 'vector_size': 300},
    {'model_type': 'skipgram', 'window': 2, 'vector_size': 300},
    {'model_type': 'cbow', 'window': 4, 'vector_size': 300},
    {'model_type': 'skipgram', 'window': 4, 'vector_size': 300}
]

In [4]:
df_lemmatized = pd.read_csv("C:/Users/yunus/Desktop/emine_proje/data/preprocessed_data_lemmatized_only.csv")
df_stemmed = pd.read_csv("C:/Users/yunus/Desktop/emine_proje/data/preprocessed_data_stemmed_only.csv")

import ast
df_lemmatized['description_processed'] = df_lemmatized['description_processed'].apply(ast.literal_eval)
df_stemmed['description_stemmed'] = df_stemmed['description_stemmed'].apply(ast.literal_eval)

In [5]:
df_lemmatized

Unnamed: 0,title,description,appears on,artist,writers,producer,released,streak,position,description_processed
0,Like a Rolling Stone,"""I wrote it. I didn't fail. It was straight,""�...",Highway 61 Revisited (Columbia),Bob Dylan,Dylan,Tom Wilson,"July, 1965",12 weeks,No. 2,"[[i, wrote, it], [i, didnt, fail], [it, wa, st..."
1,(I Can't Get No) Satisfaction',"""It's the riff heard round the world,"" says St...",Out of Our Heads�(ABKCO),The Rolling Stones,"Mick Jagger, Keith Richards",Andrew Loog Oldham,"May, 1965",14 weeks,No. 1,"[[it, the, riff, heard, round, the, world, say..."
2,Imagine,"John Lennon wrote ""Imagine,"" his greatest musi...",Imagine�(Capitol/Apple),John Lennon,John Lennon,"Lennon, Phil Spector, Yoko Ono","October, 1971",9 weeks,No. 3,"[[john, lennon, wrote, imagine, his, greatest,..."
3,What's Going On,"""What's Going On"" is an exquisite plea for pea...",What's Going On�(Tamla),Marvin Gaye,"Gaye, Renaldo Benson, Al Cleveland",Gaye�,"Feb, 1971",13 weeks,No. 2,"[[whats, going, on, is, an, exquisite, plea, f..."
4,Respect,"Otis Redding wrote ""Respect"" and recorded it f...",I Never Loved a Man the Way I Love You�(Atlantic),Aretha Franklin,Otis Redding,Jerry Wexler,"April, 1967",12 weeks,No. 1,"[[otis, redding, wrote, respect, and, recorded..."
...,...,...,...,...,...,...,...,...,...,...
495,Heroin,"This seven-minute, two-chord track spiked out ...",The Velvet Underground and Nico (Polydor),The Velvet Underground,Lou Reed,"Andy Warhol, Tom Wilson","March , 1967",Non-Single,,"[[this, sevenminute, twochord, track, spiked, ..."
496,Leader of the Pack,Morton found the inspiration for this song at ...,Myrmidons of Melodrama: Definitive Collection ...,The Shangri-Las,"George ""Shadow"" Morton, Jeff Barry, Ellie Gree...","Morton, Barry, Greenwich","Oct. , 1964",12 weeks,No. 1,"[[morton, found, the, inspiration, for, this, ..."
497,Pressure Drop,Toots and the Maytals were already reggae star...,The Harder They Come (Hip-O),Toots and the Maytals,Toots Hibbert,Leslie Kong,"Feb. , 1973",Did Not Chart,,"[[toot, and, the, maytals, were, already, regg..."
498,Come As You Are,"""It's just about people and what they're expec...",Nevermind (Geffen),Nirvana,"Kurt Cobain, Nirvana","Butch Vig, Nirvana","Sept. , 1991",18 weeks,No. 32,"[[it, just, about, people, and, what, theyre, ..."


In [6]:
# İç içe listeleri düz bir token listesi haline getir (Word2Vec için)
tokenized_corpus_lemmatized = [sentence for comment in df_lemmatized["description_processed"] for sentence in comment]
tokenized_corpus_stemmed = [sentence for comment in df_stemmed["description_stemmed"] for sentence in comment]

In [7]:
from gensim.models import Word2Vec

def train_and_save_model(corpus, param, model_prefix):
    model_type = param['model_type']
    vector_size = param['vector_size']
    window = param['window']
    
    # CBOW (sg=0) veya Skip-gram (sg=1)
    sg = 0 if model_type == 'cbow' else 1

    model = Word2Vec(
        sentences=corpus,
        vector_size=vector_size,
        window=window,
        min_count=1,
        workers=4,
        sg=sg
    )

    model_filename = f"{model_prefix}_{model_type}_vs{vector_size}_w{window}.model"
    model.save(model_filename)
    print(f"Model saved as {model_filename}")


In [8]:
# Lemmatize edilmiş corpus ile modelleri eğitme ve kaydetme
for param in parameters:
    train_and_save_model(tokenized_corpus_lemmatized, param, "lemmatized_model")

# Stemlenmiş corpus ile modelleri eğitme ve kaydetme
for param in parameters:
    train_and_save_model(tokenized_corpus_stemmed, param, "stemmed_model")

Model saved as lemmatized_model_cbow_vs100_w2.model
Model saved as lemmatized_model_skipgram_vs100_w2.model
Model saved as lemmatized_model_cbow_vs100_w4.model
Model saved as lemmatized_model_skipgram_vs100_w4.model
Model saved as lemmatized_model_cbow_vs300_w2.model
Model saved as lemmatized_model_skipgram_vs300_w2.model
Model saved as lemmatized_model_cbow_vs300_w4.model
Model saved as lemmatized_model_skipgram_vs300_w4.model
Model saved as stemmed_model_cbow_vs100_w2.model
Model saved as stemmed_model_skipgram_vs100_w2.model
Model saved as stemmed_model_cbow_vs100_w4.model
Model saved as stemmed_model_skipgram_vs100_w4.model
Model saved as stemmed_model_cbow_vs300_w2.model
Model saved as stemmed_model_skipgram_vs300_w2.model
Model saved as stemmed_model_cbow_vs300_w4.model
Model saved as stemmed_model_skipgram_vs300_w4.model


In [10]:
# Model dosyalarını yüklemek
model_1 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_cbow_vs100_w2.model")
model_2 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_skipgram_vs100_w2.model")
model_3 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_cbow_vs100_w4.model")
model_4 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_skipgram_vs100_w4.model")
model_5 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_cbow_vs300_w2.model")
model_6 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_skipgram_vs300_w2.model")
model_7 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_cbow_vs300_w4.model")
model_8 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/lemmatized_model_skipgram_vs300_w4.model")
model_9  = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_cbow_vs100_w2.model")
model_10 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_skipgram_vs100_w2.model")
model_11 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_cbow_vs100_w4.model")
model_12 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_skipgram_vs100_w4.model")
model_13 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_cbow_vs300_w2.model")
model_14 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_skipgram_vs300_w2.model")
model_15 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_cbow_vs300_w4.model")
model_16 = Word2Vec.load("C:/Users/yunus/Desktop/emine_proje/data/stemmed_model_skipgram_vs300_w4.model")

# 'like' kelimesi ile en benzer 3 kelimeyi ve skorlarını yazdırmak
def print_similar_words(model, model_name):
    similarity = model.wv.most_similar('like', topn=3)
    print(f"\n{model_name} Modeli - 'like' ile En Benzer 3 Kelime:")
    for word, score in similarity:
        print(f"Kelime: {word}, Benzerlik Skoru: {score}")
        
# 16 model için benzer kelimeleri yazdır
print_similar_words(model_1, "Lemmatized CBOW Window 2 Dim 100")
print_similar_words(model_2, "Stemmed Skipgram Window 4 Dim 100")
print_similar_words(model_3, "Lemmatized Skipgram Window 2 Dim 300")
print_similar_words(model_4, "lemmatized skipgram window4 dim 100")
print_similar_words(model_5, "lemmatized cbow window2 dim 300")
print_similar_words(model_6, "lemmatizedskipgramwindow2dim300")
print_similar_words(model_7, "lemmatized_cbow_window4_dim300")
print_similar_words(model_8, "lemmatized_skipgram_window4_dim300.model")
print_similar_words(model_9, "stemmed_cbow_window2_dim100")
print_similar_words(model_10, "stemmed_skipgram_window2_dim100")
print_similar_words(model_11, "stemmed_cbow_window4_dim100")
print_similar_words(model_12, "stemmed_skipgram_window4_dim100")
print_similar_words(model_13, "stemmed_cbow_window2_dim300")
print_similar_words(model_14, "stemmed_skipgram_window2_dim300")
print_similar_words(model_15, "stemmed_cbow_window4_dim300")
print_similar_words(model_16, "stemmed_skipgram_window4_dim300")


Lemmatized CBOW Window 2 Dim 100 Modeli - 'like' ile En Benzer 3 Kelime:
Kelime: and, Benzerlik Skoru: 0.9994775056838989
Kelime: the, Benzerlik Skoru: 0.9994653463363647
Kelime: a, Benzerlik Skoru: 0.9994574785232544

Stemmed Skipgram Window 4 Dim 100 Modeli - 'like' ile En Benzer 3 Kelime:
Kelime: string, Benzerlik Skoru: 0.9985587000846863
Kelime: off, Benzerlik Skoru: 0.9985231161117554
Kelime: didnt, Benzerlik Skoru: 0.998500645160675

Lemmatized Skipgram Window 2 Dim 300 Modeli - 'like' ile En Benzer 3 Kelime:
Kelime: a, Benzerlik Skoru: 0.9997804164886475
Kelime: and, Benzerlik Skoru: 0.9997735023498535
Kelime: the, Benzerlik Skoru: 0.9997560977935791

lemmatized skipgram window4 dim 100 Modeli - 'like' ile En Benzer 3 Kelime:
Kelime: write, Benzerlik Skoru: 0.9980273246765137
Kelime: said, Benzerlik Skoru: 0.9977957010269165
Kelime: me, Benzerlik Skoru: 0.9976887106895447

lemmatized cbow window2 dim 300 Modeli - 'like' ile En Benzer 3 Kelime:
Kelime: and, Benzerlik Skoru: 0.9

In [11]:
# 'young' kelimesi ile en benzer 3 kelimeyi ve skorlarını yazdırmak
def print_similar_words(model, model_name):
    similarity = model.wv.most_similar('young', topn=3)
    print(f"\n{model_name} Modeli - 'young' ile En Benzer 3 Kelime:")
    for word, score in similarity:
        print(f"Kelime: {word}, Benzerlik Skoru: {score}")
        
# 16 model için benzer kelimeleri yazdır
print_similar_words(model_1, "Lemmatized CBOW Window 2 Dim 100")
print_similar_words(model_2, "Stemmed Skipgram Window 4 Dim 100")
print_similar_words(model_3, "Lemmatized Skipgram Window 2 Dim 300")
print_similar_words(model_4, "lemmatized skipgram window4 dim 100")
print_similar_words(model_5, "lemmatized cbow window2 dim 300")
print_similar_words(model_6, "lemmatizedskipgramwindow2dim300")
print_similar_words(model_7, "lemmatized_cbow_window4_dim300")
print_similar_words(model_8, "lemmatized_skipgram_window4_dim300.model")
print_similar_words(model_9, "stemmed_cbow_window2_dim100")
print_similar_words(model_10, "stemmed_skipgram_window2_dim100")
print_similar_words(model_11, "stemmed_cbow_window4_dim100")
print_similar_words(model_12, "stemmed_skipgram_window4_dim100")
print_similar_words(model_13, "stemmed_cbow_window2_dim300")
print_similar_words(model_14, "stemmed_skipgram_window2_dim300")
print_similar_words(model_15, "stemmed_cbow_window4_dim300")
print_similar_words(model_16, "stemmed_skipgram_window4_dim300")


Lemmatized CBOW Window 2 Dim 100 Modeli - 'young' ile En Benzer 3 Kelime:
Kelime: to, Benzerlik Skoru: 0.9988107681274414
Kelime: their, Benzerlik Skoru: 0.998798131942749
Kelime: from, Benzerlik Skoru: 0.9987131953239441

Stemmed Skipgram Window 4 Dim 100 Modeli - 'young' ile En Benzer 3 Kelime:
Kelime: sound, Benzerlik Skoru: 0.9988180994987488
Kelime: singer, Benzerlik Skoru: 0.9987736940383911
Kelime: off, Benzerlik Skoru: 0.9986938238143921

Lemmatized Skipgram Window 2 Dim 300 Modeli - 'young' ile En Benzer 3 Kelime:
Kelime: to, Benzerlik Skoru: 0.999260425567627
Kelime: he, Benzerlik Skoru: 0.9992013573646545
Kelime: their, Benzerlik Skoru: 0.9991965293884277

lemmatized skipgram window4 dim 100 Modeli - 'young' ile En Benzer 3 Kelime:
Kelime: folk, Benzerlik Skoru: 0.9989420175552368
Kelime: began, Benzerlik Skoru: 0.998931884765625
Kelime: off, Benzerlik Skoru: 0.998928964138031

lemmatized cbow window2 dim 300 Modeli - 'young' ile En Benzer 3 Kelime:
Kelime: and, Benzerlik S