# Importing the Data

In [1]:
import os
import re
import pandas as pd

In [2]:
# Creates the dictionary to store the texts and other information
def create_dict():
    data_dict = {'filename':[], 'year':[], 'dir':[], 'text':[]}
    
    for subdir, dirs, files in os.walk('text_files'):
        for file in files:
            
            year = re.findall('\d{4}', file)[0]
            month = re.findall('ocak|subat|mart|nisan|mayis|haziran|temmuz|agustos|eylul|ekim|kasim|aralik', file)[0]
            subdir_fixed = subdir[:15]
            filename = filename = month + '-' + year + '.txt'
            
            data_dict['filename'].append(filename)
            data_dict['year'].append(year)
            data_dict['dir'].append(os.path.join(subdir_fixed, filename))
        
    return data_dict

# Reads the text files and storing in the dictionary
def read_txt(data_dict):
    for dirs in data_dict['dir']:
        with open(dirs, 'r') as f:
            text = f.read()
        data_dict['text'].append(text)
    
    return data_dict

data_dict = create_dict()
data_dict = read_txt(data_dict)
data = pd.DataFrame(data_dict)

data.head()

Unnamed: 0,filename,year,dir,text
0,mart-2015.txt,2015,text_files/2015/mart-2015.txt,Bilim\nTeknikve\n\nAylık Popüler Bilim Dergi...
1,ocak-2015.txt,2015,text_files/2015/ocak-2015.txt,Bilim\nTeknikve\n\nAylık Popüler Bilim Dergi...
2,nisan-2015.txt,2015,text_files/2015/nisan-2015.txt,Bilim\nTeknikve\n\nAylık Popüler Bilim Dergi...
3,temmuz-2015.txt,2015,text_files/2015/temmuz-2015.txt,Bilim\nTeknikve\n\nAylık Popüler Bilim Dergi...
4,mayis-2015.txt,2015,text_files/2015/mayis-2015.txt,Bilim\nTeknikve\n\nAylık Popüler Bilim Dergi...


# Preprocessing

- Fixing the Turkish characters*
- Lowercase
- Stemming*
- Tokenization
- Removing stopwords


In [3]:
from nltk.tokenize import sent_tokenize, word_tokenize
from TurkishStemmer import TurkishStemmer
from nltk.corpus import stopwords
from tqdm import tqdm as tqdm

import string
import time
import nltk
import re

In [4]:
# pdfminer3 did not recognize Turkish characters
# and the documentation is not well written
# manually fixing them
def preprocess(text):
    text = text.str.replace('‹', 'İ')
    text = text.str.replace('¤', 'ğ')
    text = text.str.replace('›', 'ı')
    text = text.str.replace('ﬂ', 'ş')
    text = text.str.replace('ß', 'ş')
    text = text.str.replace('(cid:159)', 'ü')
    text = text.str.replace('(cid:221)', 'ı')
    text = text.str.replace('(cid:223)', 'ş')
    text = text.str.replace('(cid:141)', 'ç')
    text = text.str.replace('(cid:154)', 'ö')
    text = text.str.replace('(cid:219)', 'ğ')
    text = text.str.replace('(cid:222)', 'Ş')
    text = text.str.replace('(cid:220)', 'İ')
    text = text.str.replace('(cid:133)', 'Ö')
    text = text.str.replace('(cid:213)', "'")
    text = text.str.replace('(cid:134)', 'Ü')
    text = text.str.replace('(cid:130)', 'Ç')
    text = text.str.replace('(cid:212)', '')
    text = text.str.replace('(cid:210)', '')
    text = text.str.replace('(cid:211)', '')
    text = text.str.replace('(cid:201)', '')
    text = text.str.replace('(cid:158)', 'ü')
    text = text.str.replace('Ý', 'ı')
    text = text.str.replace('Û', 'ğ')
    text = text.str.replace('Õ', "'")
    text = text.str.replace('-\n', '')
    text = text.str.replace('\n', ' ')
    text = text.str.replace('\x02', ' ')
    text = text.str.replace('\x0c', ' ')
    text = text.str.lower() # Converting to lowercase
    text = text.str.replace('doç.', 'doç')
    text = text.str.replace('dr.', 'dr')
    text = text.str.replace('prof.', 'prof')
    text = text.str.replace('yrd.', 'yrd')
    text = text.str.replace('i̇', 'i')
    
    return text

data['text'] = preprocess(data['text'])

In [5]:
"""
# Stemming
stemmer = TurkishStemmer()

stemmed_lists = []
for index in tqdm(data.index):
    mini_l = []
    for text in data.loc[index]['text'].split(" "):
        mini_l.append(stemmer.stem(text))
        
    big_text = " "
    for char in mini_l:
        big_text = big_text + " " + char
    stemmed_lists.append(big_text)

data['stemmed'] = stemmed_lists
"""

data.head()

Unnamed: 0,filename,year,dir,text
0,mart-2015.txt,2015,text_files/2015/mart-2015.txt,bilim teknikve aylık popüler bilim dergisi ...
1,ocak-2015.txt,2015,text_files/2015/ocak-2015.txt,bilim teknikve aylık popüler bilim dergisi ...
2,nisan-2015.txt,2015,text_files/2015/nisan-2015.txt,bilim teknikve aylık popüler bilim dergisi ...
3,temmuz-2015.txt,2015,text_files/2015/temmuz-2015.txt,bilim teknikve aylık popüler bilim dergisi ...
4,mayis-2015.txt,2015,text_files/2015/mayis-2015.txt,bilim teknikve aylık popüler bilim dergisi ...


In [6]:
# Stopwords
def read_stopwords():
    sw = stopwords.words('turkish')
    with open('turkish-stopwords.txt') as f:
        text = f.read()
        sw.extend(text.split())
        sw.extend([*string.punctuation])
    return list(set(sw))

# Sentence extraction
def extract_sentences(data):
    sent_tokenizer = nltk.tokenize.PunktSentenceTokenizer()
    sentence_all = []
    
    print('Extracting sentences.')
    time.sleep(1)
    
    for text in tqdm(data['text'].tolist()):
        s_list = sent_tokenizer.tokenize(text)
        sentence_all.append(s_list)
    
    data['sentence'] = sentence_all

    return data

# Tokenizer
# https://github.com/apdullahyayik/Turkish-Word-Tokenizer/blob/master/word_tokenize.py
def word_tokenize_turkish(sentence, sw):
    """
    Args:
        sentence (str): any sentence.
    Returns:
        list: each item is a word.
    """
    
    
    acronym_each_dot = r"(?:[a-zğçşöüı]\.){2,}"
    acronym_end_dot = r"\b[a-zğçşöüı]{2,3}\."
    suffixes = r"[a-zğçşöüıi̇]{3,}' ?[a-zğçşöüıi̇]{0,3}"
    numbers = r"\d+[.,:\d]+"
    any_word = r"[a-zğçşöüıi̇]+"
    punctuations = r"[a-zğçşöüıi̇]*[.,!?;:]"
    word_regex = "|".join([acronym_each_dot,
                           acronym_end_dot,
                           suffixes,
                           numbers,
                           any_word,
                           punctuations])
    tokens = re.compile("%s"%word_regex, re.I).findall(sentence)
    filtered_tokens = []

    for token in tokens:
        if token not in sw:
            if len(token) != 1:
                filtered_tokens.append(token)

    
    return filtered_tokens 

# Tokenizes all sentences
def tokenize_sentences(data, sw):
    token_list_all = []
    sw = read_stopwords()
    
    print('Tokenizing sentences.')
    time.sleep(1)
    
    for sentence_list in tqdm(data['sentence'].tolist()):
        token_list_temp = []
        for sentence in sentence_list:
            token = word_tokenize_turkish(sentence, sw)
            token_list_temp.append(token)
        token_list_all.append(token_list_temp)

    data['sentence_tokenized'] = token_list_all  
    
    return data

In [7]:
sw = read_stopwords()

data = extract_sentences(data)
data = tokenize_sentences(data, sw)

Extracting sentences.


100%|██████████| 258/258 [00:28<00:00,  8.95it/s]


Tokenizing sentences.


100%|██████████| 258/258 [01:16<00:00,  3.39it/s]


In [8]:
data.head()

Unnamed: 0,filename,year,dir,text,sentence,sentence_tokenized
0,mart-2015.txt,2015,text_files/2015/mart-2015.txt,bilim teknikve aylık popüler bilim dergisi ...,[ bilim teknikve aylık popüler bilim dergisi...,"[[bilim, teknikve, aylık, popüler, bilim, derg..."
1,ocak-2015.txt,2015,text_files/2015/ocak-2015.txt,bilim teknikve aylık popüler bilim dergisi ...,[ bilim teknikve aylık popüler bilim dergisi...,"[[bilim, teknikve, aylık, popüler, bilim, derg..."
2,nisan-2015.txt,2015,text_files/2015/nisan-2015.txt,bilim teknikve aylık popüler bilim dergisi ...,[ bilim teknikve aylık popüler bilim dergisi...,"[[bilim, teknikve, aylık, popüler, bilim, derg..."
3,temmuz-2015.txt,2015,text_files/2015/temmuz-2015.txt,bilim teknikve aylık popüler bilim dergisi ...,[ bilim teknikve aylık popüler bilim dergisi...,"[[bilim, teknikve, aylık, popüler, bilim, derg..."
4,mayis-2015.txt,2015,text_files/2015/mayis-2015.txt,bilim teknikve aylık popüler bilim dergisi ...,[ bilim teknikve aylık popüler bilim dergisi...,"[[bilim, teknikve, aylık, popüler, bilim, derg..."


# Model Training

## Seperating Time Intervals

In [9]:
data['year'] = data['year'].astype('int64') # Convert year values to numerical
data.sort_values(by='year',inplace=True) # Sort by year

data_99_09 = data.loc[(data['year'] >= 1999) & (data['year'] <= 2009)] # Extract a time interval
data_10_20 = data.loc[(data['year'] >= 2010) & (data['year'] <= 2020)] # Extract a time interval

In [10]:
data_99_09

Unnamed: 0,filename,year,dir,text,sentence,sentence_tokenized
23,temmuz-1999.txt,1999,text_files/1999/temmuz-1999.txt,selçuk alsan - raşit gürdilek atomaltı dünya...,[ selçuk alsan - raşit gürdilek atomaltı düny...,"[[selçuk, alsan, raşit, gürdilek, atomaltı, dü..."
26,kasim-1999.txt,1999,text_files/1999/kasim-1999.txt,h a b e r l e r i r a ş i t g ü r ...,[ h a b e r l e r i r a ş i t g ü r...,"[[nobel, ödülleri, açıklandı, yapıları, hareke..."
25,subat-1999.txt,1999,text_files/1999/subat-1999.txt,bilim ve teknoloji haberleri ra(ş)it g(ü)rdi...,[ bilim ve teknoloji haberleri ra(ş)it g(ü)rd...,"[[bilim, teknoloji, haberleri, ra, rdilek, sel..."
24,agustos-1999.txt,1999,text_files/1999/agustos-1999.txt,ii. ulusal gökyüzü gözlem şenliği’ne doğru......,"[ ii., ulusal gökyüzü gözlem şenliği’ne doğru....","[[ii.], [ulusal, gökyüzü, gözlem, şenliği, doğ..."
22,eylul-1999.txt,1999,text_files/1999/eylul-1999.txt,dünya’da binlerce insan depremler yüzünde...,[ dünya’da binlerce insan depremler yüzünd...,"[[dünya, binlerce, insan, depremler, yüzünden,..."
...,...,...,...,...,...,...
164,nisan-2009.txt,2009,text_files/2009/nisan-2009.txt,“türkiye’nin soyu tehlikedeki canlıları - 1” ...,[“türkiye’nin soyu tehlikedeki canlıları - 1” ...,"[[türkiye, soyu, tehlikedeki, canlıları, poste..."
163,eylul-2009.txt,2009,text_files/2009/eylul-2009.txt,aylık popüler bilim dergisi eylül 2009 yıl 4...,[aylık popüler bilim dergisi eylül 2009 yıl ...,"[[aylık, popüler, bilim, dergisi, eylül, 2009,..."
162,ocak-2009.txt,2009,text_files/2009/ocak-2009.txt,geçmişe yolculuk yapmak belki de hepimizin ort...,[geçmişe yolculuk yapmak belki de hepimizin or...,"[[geçmişe, yolculuk, hepimizin, ortak, düşü, a..."
172,mayis-2009.txt,2009,text_files/2009/mayis-2009.txt,aylık popüler bilim dergisi mayıs 2009 yıl 4...,[aylık popüler bilim dergisi mayıs 2009 yıl ...,"[[aylık, popüler, bilim, dergisi, mayıs, 2009,..."


In [11]:
data_10_20

Unnamed: 0,filename,year,dir,text,sentence,sentence_tokenized
195,eylul-2010.txt,2010,text_files/2010/eylul-2010.txt,coğrafi bilgi sistemi nedir?... nasıl çalışır?...,"[coğrafi bilgi sistemi nedir?..., nasıl çalışı...","[[coğrafi, bilgi, sistemi, nedir], [çalışır], ..."
197,mart-2010.txt,2010,text_files/2010/mart-2010.txt,savaş teknolojisinden teknoloji savaşına bilgi...,[savaş teknolojisinden teknoloji savaşına bilg...,"[[savaş, teknolojisinden, teknoloji, savaşına,..."
186,ocak-2010.txt,2010,text_files/2010/ocak-2010.txt,2010 gök olayları yıllığı derginizle birlik...,[ 2010 gök olayları yıllığı derginizle birli...,"[[2010, gök, olayları, yıllığı, derginizle, bi..."
194,aralik-2010.txt,2010,text_files/2010/aralik-2010.txt,ülkemizin özgün ve yerli insansız hava araçlar...,[ülkemizin özgün ve yerli insansız hava araçla...,"[[ülkemizin, özgün, yerli, insansız, hava, ara..."
193,nisan-2010.txt,2010,text_files/2010/nisan-2010.txt,"“büyük sorular, büyük deney” posteri derginizl...","[“büyük sorular, büyük deney” posteri derginiz...","[[büyük, sorular, büyük, deney, posteri, dergi..."
...,...,...,...,...,...,...
79,mart-2020.txt,2020,text_files/2020/mart-2020.txt,h a b e r l e r i a l p a k o ğ l ...,[ h a b e r l e r i a l p a k o ğ l...,"[[susam, açılacak], [avrupalı, amerikalı, fizi..."
78,mayis-2020.txt,2020,text_files/2020/mayis-2020.txt,h a b e r l e r i gedeki iletişim ...,[ h a b e r l e r i gedeki iletişim ...,"[[gedeki, iletişim, uydularında, yüzden, bozul..."
77,ocak-2020.txt,2020,text_files/2020/ocak-2020.txt,aylık popüler bilim dergisi mart 2020 yıl 53...,[aylık popüler bilim dergisi mart 2020 yıl 5...,"[[aylık, popüler, bilim, dergisi, mart, 2020, ..."
76,subat-2020.txt,2020,text_files/2020/subat-2020.txt,aylık popüler bilim dergisi şubat 2020 yıl 5...,[aylık popüler bilim dergisi şubat 2020 yıl ...,"[[aylık, popüler, bilim, dergisi, şubat, 2020,..."


In [12]:
# Merging the years of data
def merge_sentences(df):
    merged = []
    for sentence_list in df['sentence_tokenized'].tolist():
        merged.extend(sentence_list)
    return merged

merged_99_09 = merge_sentences(data_99_09)
merged_10_20 = merge_sentences(data_10_20)

In [13]:
!pip install ipython-autotime
%load_ext autotime

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [14]:
from gensim.models import Word2Vec

# Model trained with texts from 1999 to 2009

## Word2Vec model trained with skip-gram and negative sampling
model_99_09 = Word2Vec(size=150, window=4, min_count=5, sg=1, negative=10, workers=4, seed=7)

print('Building vocab...')
model_99_09.build_vocab(merged_99_09)

print('Training the model...')
model_99_09.train(merged_99_09,
                  total_examples=model_99_09.corpus_count,
                  epochs=5)

Building vocab...
Training the model...


(27528883, 29840175)

time: 2min 52s


In [15]:
# Model trained with texts from 2009 to 2020

## Word2Vec model trained with skip-gram and negative sampling
model_10_20 = Word2Vec(size=150, window=4, min_count=5, sg=1, negative=10, workers=4, seed=7)

print('Building vocab...')
model_10_20.build_vocab(merged_10_20)

print('Training the model...')
model_10_20.train(merged_10_20,
                  total_examples=model_10_20.corpus_count,
                  epochs=5)

Building vocab...
Training the model...


(15401216, 16785550)

time: 1min 35s


In [16]:
if not os.path.exists('models'):
    os.mkdir('models')
    
model_99_09.save('models/model_99_09.model')
model_10_20.save('models/model_10_20.model')

time: 1.77 s


# Model Alignment

- Source: https://gist.github.com/quadrismegistus/09a93e219a6ffc4f216fb85235535faf

In [17]:
import gensim
import numpy as np
import pandas as pd
from gensim.models import Word2Vec
from scipy.spatial.distance import cosine

model_99_09 = Word2Vec.load('models/model_99_09.model')
model_10_20 = Word2Vec.load('models/model_10_20.model')

time: 1.44 s


In [18]:
def smart_procrustes_align_gensim(base_embed, other_embed, words=None):
    """Procrustes align two gensim word2vec models (to allow for comparison between same word across models).
    Code ported from HistWords <https://github.com/williamleif/histwords> by William Hamilton <wleif@stanford.edu>.
        (With help from William. Thank you!)
    First, intersect the vocabularies (see `intersection_align_gensim` documentation).
    Then do the alignment on the other_embed model.
    Replace the other_embed model's syn0 and syn0norm numpy matrices with the aligned version.
    Return other_embed.
    If `words` is set, intersect the two models' vocabulary with the vocabulary in words (see `intersection_align_gensim` documentation).
    """
    
    # patch by Richard So [https://twitter.com/richardjeanso) (thanks!) to update this code for new version of gensim
    base_embed.init_sims()
    other_embed.init_sims()

    # make sure vocabulary and indices are aligned
    in_base_embed, in_other_embed = intersection_align_gensim(base_embed, other_embed, words=words)
    
    # get the embedding matrices
    base_vecs = in_base_embed.wv.vectors_norm
    other_vecs = in_other_embed.wv.vectors_norm

    # just a matrix dot product with numpy
    m = other_vecs.T.dot(base_vecs) 
    # SVD method from numpy
    u, _, v = np.linalg.svd(m)
    # another matrix operation
    ortho = u.dot(v) 
    # Replace original array with modified one
    # i.e. multiplying the embedding matrix (syn0norm/vectors_norm)by "ortho"
    other_embed.wv.vectors_norm = other_embed.wv.vectors = (other_embed.wv.vectors_norm).dot(ortho)
    return other_embed
	
def intersection_align_gensim(m1,m2, words=None):
    """
    Intersect two gensim word2vec models, m1 and m2.
    Only the shared vocabulary between them is kept.
    If 'words' is set (as list or set), then the vocabulary is intersected with this list as well.
    Indices are re-organized from 0..N in order of descending frequency (=sum of counts from both m1 and m2).
    These indices correspond to the new syn0 and syn0norm objects in both gensim models:
        -- so that Row 0 of m1.syn0 will be for the same word as Row 0 of m2.syn0
        -- you can find the index of any word on the .index2word list: model.index2word.index(word) => 2
    The .vocab dictionary is also updated for each model, preserving the count but updating the index.
    """

    # Get the vocab for each model
    vocab_m1 = set(m1.wv.vocab.keys())
    vocab_m2 = set(m2.wv.vocab.keys())

    # Find the common vocabulary
    common_vocab = vocab_m1&vocab_m2
    if words: common_vocab&=set(words)

    # If no alignment necessary because vocab is identical...
    if not vocab_m1-common_vocab and not vocab_m2-common_vocab:
        return (m1,m2)

    # Otherwise sort by frequency (summed for both)
    common_vocab = list(common_vocab)
    common_vocab.sort(key=lambda w: m1.wv.vocab[w].count + m2.wv.vocab[w].count,reverse=True)

    # Then for each model...
    for m in [m1,m2]:
        # Replace old syn0norm array with new one (with common vocab)
        indices = [m.wv.vocab[w].index for w in common_vocab]
        old_arr = m.wv.vectors_norm
        new_arr = np.array([old_arr[index] for index in indices])
        m.wv.vectors_norm = m.wv.vectors = new_arr

        # Replace old vocab dictionary with new one (with common vocab)
        # and old index2word with new one
        m.wv.index2word = common_vocab
        old_vocab = m.wv.vocab
        new_vocab = {}
        for new_index,word in enumerate(common_vocab):
            old_vocab_obj=old_vocab[word]
            new_vocab[word] = gensim.models.word2vec.Vocab(index=new_index, count=old_vocab_obj.count)
        m.wv.vocab = new_vocab

    return (m1,m2)

time: 3.89 ms


In [19]:
# Function aligned the second parameter (model_09_20) to the first parameter
# and returned the aligned model
model_10_20_aligned = smart_procrustes_align_gensim(model_99_09, model_10_20)

time: 567 ms


# Detecting Shifts

In [20]:
# KeyedVectors of both models
wv_0 = model_99_09.wv
wv_1 = model_10_20_aligned.wv

time: 404 µs


In [21]:
def find_shifts(wv0, wv1):
    shifts = []

    for word in wv0.vocab.keys():
        distance = cosine(wv0[word], wv1[word]) # Computing cosine distance
        shifts.append((word, distance))
  
    # Sorting shifts with respect to distance values
    shifts = sorted(shifts, key=lambda tup: tup[1])
    shifts.reverse()

    return shifts

time: 2.35 ms


In [22]:
# Finding the largest shifts
shifts = find_shifts(wv_0, wv_1)

# Largest 50 shifts
largest_50 = shifts[:50]
largest_50 = pd.DataFrame(largest_50, columns=['Word', 'Distance'])
largest_50

Unnamed: 0,Word,Distance
0,amer,1.000267
1,nlar,0.790555
2,krs.,0.73453
3,ücretsizdir,0.728433
4,seismol,0.700413
5,kar,0.670901
6,karen,0.660294
7,137,0.641958
8,sezer,0.624423
9,ekici,0.623284


time: 2.7 s


# Remarkable Shifts

In [23]:
words = [
         'artırılmış', 'tesla', 'tabletler',
         'çekirdekli', 'sancar',
         'cinsiyet', 'google',
         'sony', 'nükleer', 'virüsü', 'makine',
         'derin', 'karantina', 'bayt', 'bit',
         'zeka'
        ]

time: 634 µs


In [24]:
movements = {'Word':[], 'Shift':[], 'Moving Away':[], 'Moving Towards':[]}

def create_movements_df(words, wv_0, wv_1):
    for word in words:
        shift = cosine(wv_0[word], wv_1[word])
        similar_99_09 = wv_0.most_similar(word)
        similar_10_20 = wv_1.most_similar(word)
        
        # Eliminating intersections
        for index0, element0 in enumerate(similar_99_09):
            for index1, element1 in enumerate(similar_10_20):
                if element0[0] == element1[0]:
                    similar_99_09.pop(index0)
                    similar_10_20.pop(index1)

        movements['Word'].append(word)
        movements['Shift'].append(shift)
        movements['Moving Away'].append(similar_99_09)
        movements['Moving Towards'].append(similar_10_20)

    movements_df = pd.DataFrame(movements)
    movements_df.sort_values(by=['Shift'], inplace=True, ascending=False)
    
    return movements_df

movements_df = create_movements_df(words, wv_0, wv_1)

time: 141 ms


In [25]:
from IPython.display import display

def show_movements(movements_df, n):
    for index, row in movements_df.iterrows():
        if n >=0:
            away = row['Moving Away']
            towards = row['Moving Towards']
            word = [row['Word'] for i in range(len(away))]
            
            away_df = pd.DataFrame(data=away, columns=['Away From', 'Similarity'])
            towards_df = pd.DataFrame(data=towards, columns=['Towards', 'Similarity'])
            word_df = pd.DataFrame(data=word, columns=['Word'])
            
            result = pd.concat([word_df, away_df, towards_df], axis=1)
            display(result)
            print('-'*50)
            
show_movements(movements_df, len(words))

Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,bayt,terabyte,0.910218,ebu,0.942343
1,bayt,1024,0.901173,mans,0.941403
2,bayt,terabayt,0.897193,alhazen,0.941268
3,bayt,gigabyte,0.884226,coğrafyacı,0.940517
4,bayt,1280,0.882741,ish,0.938598
5,bayt,megabit,0.882398,mes,0.937334
6,bayt,kbit,0.874462,harezmi,0.93452
7,bayt,giga,0.873602,965,0.933401
8,bayt,nf,0.872744,cev,0.930731
9,bayt,512,0.871079,1039,0.928196


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,tesla,gauss,0.787601,motors,0.790893
1,tesla,gücünde,0.746679,edison,0.776335
2,tesla,stark,0.746632,westinghouse,0.72708
3,tesla,skaler,0.745532,bmw,0.724963
4,tesla,terazisi,0.73824,sürücüsüz,0.71972
5,tesla,toroidal,0.734447,ford,0.717585
6,tesla,clerk,0.730753,otomobil,0.715891
7,tesla,indüksiyon,0.729105,otomobillerin,0.708208
8,tesla,chadwick,0.725631,toyota,0.704494


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,artırılmış,gıdalardaki,0.881974,gerçeklik,0.864647
1,artırılmış,çeşidine,0.875663,gözlükleri,0.801788
2,artırılmış,mikroalgler,0.875453,donatılan,0.779756
3,artırılmış,sabunlar,0.870197,konsolu,0.769726
4,artırılmış,kazandırılmış,0.865433,tabletler,0.769601
5,artırılmış,değerlendirilebiliyor,0.863376,internetin,0.764443
6,artırılmış,dönüştürülmesinde,0.862391,yazılımla,0.763071
7,artırılmış,tüketme,0.862173,yazılımlarını,0.762218
8,artırılmış,maddelerinden,0.857562,barkodlar,0.760164
9,artırılmış,gübrelerin,0.857233,interneti,0.760141


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,tabletler,papirüs,0.838574,telefonlar,0.91671
1,tabletler,oyularak,0.831037,tabletlerden,0.901058
2,tabletler,bıçaklar,0.816775,televizyonlara,0.889759
3,tabletler,tabletlere,0.816192,konsolu,0.880979
4,tabletler,tabletlerde,0.816036,televizyonu,0.8807
5,tabletler,tabletlerin,0.815148,telefonlarda,0.877254
6,tabletler,kabartma,0.810514,bilgisayarlarını,0.873613
7,tabletler,kilden,0.808286,daktilo,0.873271
8,tabletler,çivi,0.802759,telefonlara,0.872286
9,tabletler,yazısıyla,0.801085,telefonlarında,0.870825


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,çekirdekli,ökaryotik,0.876869,terabyte,0.788848
1,çekirdekli,ökaryot,0.856128,sata,0.786074
2,çekirdekli,zigot,0.825493,inç,0.776029
3,çekirdekli,prokaryot,0.82455,crt,0.768536
4,çekirdekli,prokaryotik,0.805363,kapasitesinde,0.763796
5,çekirdekli,embriyolarında,0.804884,işlemciye,0.76371
6,çekirdekli,ribozomlar,0.80441,ram,0.762282
7,çekirdekli,zarıyla,0.798176,markalı,0.761023
8,çekirdekli,fosilinde,0.794278,fiyatına,0.760625
9,çekirdekli,sarmallı,0.792457,ekranlı,0.758978


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,sancar,niyazi,0.946613,aziz,0.935435
1,sancar,neşet,0.938859,tomas,0.777682
2,sancar,kamil,0.938413,aaron,0.722762
3,sancar,meral,0.938224,kandel,0.710693
4,sancar,turgay,0.936199,agre,0.70207
5,sancar,arat,0.935183,paylaştı,0.700935
6,sancar,baykara,0.932719,insanımız,0.697266
7,sancar,erdener,0.931959,didier,0.694127
8,sancar,handan,0.930062,queloz,0.690246
9,sancar,gülkan,0.929396,keşifleriyle,0.687691


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,zeka,zek,0.736623,94,0.841655
1,zeka,halıcı,0.715884,vakfi,0.840134
2,zeka,oyunlarının,0.700654,hazırlanmıştır,0.830895
3,zeka,yaratıcılık,0.66839,muammer,0.81617
4,zeka,karaçay,0.664762,abalı,0.813066
5,zeka,yz,0.659393,halıcı,0.809441
6,zeka,iq,0.652345,matemanya,0.808674
7,zeka,sinirbilimciler,0.646449,eleme,0.801926


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,sony,kodak,0.870465,asus,0.920744
1,sony,eos,0.860786,lg,0.907886
2,sony,canon,0.858606,lenovo,0.896641
3,sony,cyber,0.846248,playstation,0.895097
4,sony,panasonic,0.837574,toshiba,0.878928
5,sony,nikon,0.835778,amd,0.855796
6,sony,ericsson,0.830036,panasonic,0.855652


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,bit,ğe,0.693343,qubit,0.76282
1,bit,fik,0.691248,kubit,0.73768
2,bit,bil,0.677506,4096,0.730855
3,bit,kes,0.672036,bilgisayarlarda,0.730843
4,bit,çarp,0.667967,256,0.727934
5,bit,bitin,0.667302,diske,0.727521
6,bit,hip.,0.663777,mb,0.723151
7,bit,nok,0.662967,128,0.71395


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,nükleer,bombalar,0.669706,reaktörlerde,0.716852
1,nükleer,santrallerden,0.663649,reaktörlerin,0.696514
2,nükleer,santralde,0.66345,reaktörler,0.680706
3,nükleer,füzyon,0.657601,reaktör,0.675703
4,nükleer,termonükleer,0.650171,santral,0.673695
5,nükleer,termik,0.645542,santralin,0.671921
6,nükleer,santralinde,0.645149,santraller,0.655417
7,nükleer,silahlar,0.642273,toryum,0.651849
8,nükleer,bombanın,0.64136,felaketinden,0.639994


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,google,earth,0.80957,microsoft,0.797389
1,google,adobe,0.799589,chrome,0.77346
2,google,iphone,0.797054,play,0.767167
3,google,maps,0.779062,maps,0.766653
4,google,wap,0.77831,glass,0.755426
5,google,firefox,0.772257,skype,0.747479
6,google,photoshop,0.771784,siri,0.746581
7,google,msn,0.76926,nokia,0.746307
8,google,sayfasına,0.76857,paypal,0.743309


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,derin,sığ,0.708729,öğrenmeyle,0.575264
1,derin,dibi,0.659694,kuşların,0.564236
2,derin,derinliklerine,0.637515,geçilen,0.562899
3,derin,denizlerin,0.627604,gözlemcilerinin,0.562418
4,derin,sularda,0.6262,volkanları,0.55928
5,derin,göllerin,0.617703,cismini,0.558445
6,derin,çukurları,0.608628,görüntülerindeki,0.557782
7,derin,vadilerin,0.603846,yavaşdalga,0.556151
8,derin,diplerinde,0.598408,çökelleri,0.552133


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,makine,makinası,0.631735,mekatronik,0.742608
1,makine,makineleri,0.617308,öğrenmesi,0.724145
2,makine,tasarımı,0.61394,benzetim,0.701268
3,makine,makinelerinin,0.613404,atölyesinde,0.693653
4,makine,mühendisi,0.606407,otomasyon,0.693503
5,makine,torna,0.605057,cad,0.688548
6,makine,devridaim,0.604788,öğrenimi,0.681879
7,makine,mühendisliğini,0.601606,metalürji,0.667732


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,cinsiyet,cinsiyetin,0.774593,ayrımcılığı,0.791156
1,cinsiyet,cinsiyete,0.771675,şizofreniye,0.790727
2,cinsiyet,eşey,0.768638,çocuklardaki,0.766984
3,cinsiyet,akrabalık,0.759436,ergen,0.765717
4,cinsiyet,gruplarındaki,0.757092,genetiğin,0.764946
5,cinsiyet,ırk,0.756755,bireylerde,0.764425
6,cinsiyet,kedilerde,0.742931,faktörleri,0.763958
7,cinsiyet,ebeveyn,0.7413,hormonları,0.763744
8,cinsiyet,anomalileri,0.740335,korelasyon,0.762131


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,virüsü,gribi,0.836732,hiv,0.798004
1,virüsü,virüsleri,0.813028,virüsleri,0.797993
2,virüsü,virüslerinin,0.785577,herpes,0.780235
3,virüsü,grip,0.772637,sars,0.756516
4,virüsü,virüsüyle,0.768296,adenovirüs,0.75546
5,virüsü,virüsünü,0.767783,hsv,0.75289


--------------------------------------------------


Unnamed: 0,Word,Away From,Similarity,Towards,Similarity.1
0,karantina,obeziteyi,0.897901,yönetsel,0.949057
1,karantina,cezai,0.895758,umutlarını,0.947849
2,karantina,sertifikalı,0.89303,üretkenliği,0.946759
3,karantina,onaylı,0.888863,değerlendirilebiliyor,0.946454
4,karantina,tüketiciyi,0.888174,değerlendirmelerini,0.946091
5,karantina,kültürlerine,0.888138,tanıtarak,0.944479
6,karantina,klinikleri,0.885966,gere,0.943388
7,karantina,önerilmesi,0.885893,hedeflendi,0.943075
8,karantina,tıbb,0.882275,uyardı,0.942486
9,karantina,reçetesiz,0.881497,sağladık,0.941005


--------------------------------------------------
time: 209 ms


# References
- William L. Hamilton, Jure Leskovec, and Dan Jurafsky. ACL 2016. Diachronic Word Embeddings Reveal Statistical Laws of Semantic Change. https://nlp.stanford.edu/projects/histwords/