In [1]:
from google.colab import drive 
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [None]:
# Get PPMI
import pandas as pd
import numpy as np
from collections import defaultdict


def co_occurrence(sentences, window_size, vocab):
    d = defaultdict(int)
    for text in sentences:
        # iterate over sentences
        for i in range(len(text)):
            token = text[i]
            if token in vocab:
                next_token = text[i+1 : i+1+window_size]
                for t in next_token:
                    if t in vocab:
                        key = tuple(sorted([t, token]))
                        d[key] += 1
    
    # formulate the dictionary into dataframe
    df = pd.DataFrame(data=np.zeros((len(vocab), len(vocab)), dtype=np.int16),
                      index=vocab,
                      columns=vocab)
    for key, value in d.items():
        df.at[key[0], key[1]] = value
        df.at[key[1], key[0]] = value
    return df

In [None]:
import numpy as np

def pmi(df, positive=True):
    col_totals = df.sum(axis=0)
    total = col_totals.sum()
    row_totals = df.sum(axis=1)
    expected = np.outer(row_totals, col_totals) / total
    df = df / expected
    # Silence distracting warnings about log(0):
    with np.errstate(divide='ignore'):
        df = np.log(df)
    df[np.isinf(df)] = 0.0  # log(0) = 0
    if positive:
        df[df < 0] = 0.0
    return df

In [None]:
window_size = 5

In [None]:
from gensim.models import Word2Vec
from tqdm import tqdm
from ast import literal_eval

# Collect all vocabulary
all_vocab = set()
years = [i for i in range(2000, 2015)]
for year in tqdm(years):
    model = Word2Vec.load(f'/content/gdrive/MyDrive/Colab Notebooks/Thesis/Models/Classification/Dynamic/Word2vec/word2vec_5w_{year}.model')
    vocab = model.wv.index_to_key
    all_vocab.update(vocab)

100%|██████████| 15/15 [00:34<00:00,  2.31s/it]


In [None]:
# The vocabulary is too large, we will take only words from classification task and words with minimal frequency 50
len(all_vocab)

83052

In [None]:
import pandas as pd

df_words = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Common/classification_words.csv')
df_words.head()

In [None]:
with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/News/Common/news_all_vocab.json', 'r', encoding='utf-8') as f:
    all_vocab_1 = json.loads(json.load(f))

target_words = df_words.WORD.values
new_all_vocab = [w for w in all_vocab if w in all_vocab_1 or w in target_words]
all_vocab = new_all_vocab

In [None]:
len(all_vocab)

18715

In [None]:
import json

with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/news_all_vocab_5w.json', 'w', encoding='utf-8') as f:
    json.dump(json.dumps(list(all_vocab)), f)

In [None]:
import json

with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/news_all_vocab_5w.json', 'r', encoding='utf-8') as f:
    all_vocab = json.loads(json.load(f))

In [None]:
word_to_index = {w: i for i, w in enumerate(all_vocab)}

In [None]:
import numpy as np
from gensim.models import Word2Vec
from tqdm import tqdm

# Initialize the U matrix with word embeddings
years = [i for i in range(2000, 2015)]
U = [np.zeros((len(all_vocab), 300)) for t in years]

i = 0
for year in tqdm(years):
    model = Word2Vec.load(f'/content/gdrive/MyDrive/Colab Notebooks/Thesis/Models/Classification/Dynamic/Word2vec/word2vec_5w_{year}.model')
    for word in model.wv.index_to_key:
        if word not in word_to_index:
            continue
        embed = model.wv[word]
        U[i][word_to_index[word]] = embed
    i += 1
U = np.asarray(U)

100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


In [None]:
from sklearn.decomposition import PCA
from tqdm import tqdm

# Reduce the dimensionality of word embeddings from 300 to 50
U_new = []
for i in tqdm(range(len(U))):
    pca = PCA(n_components=50)
    fitted = pca.fit_transform(U[i])
    U_new.append(fitted)

100%|██████████| 15/15 [00:20<00:00,  1.40s/it]


In [None]:
with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynammic/news_U_matrix_5w.npy', 'wb') as f:
    np.save(f, U_new)

In [None]:
with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynammic/news_U_matrix_5w.npy', 'rb') as f:
    U = np.load(f)

In [None]:
import pandas as pd
from gensim.models import Word2Vec
from tqdm import tqdm
from ast import literal_eval

# Compute PPMI
years = [i for i in range(2000, 2015)]
k = 0
for year in tqdm(years):
    df_temp = pd.read_csv(f'/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/News/Common/data_{year}.csv', compression='zip',
                              converters={'tokenized_text': literal_eval, 'lemmas': literal_eval})
    
    model = Word2Vec.load(f'/content/gdrive/MyDrive/Colab Notebooks/Thesis/Models/Classification/Dynamic/Word2vec/word2vec_5w_{year}.model')
    model_vocab = set(model.wv.index_to_key)
    vocab = list(model_vocab.intersection(set(all_vocab)))                
    ppmi_df = co_occurrence(df_temp.lemmas, window_size, vocab)

    column_words = ppmi_df.columns
    ppmi_df.index = column_words
    ppmi_df = pmi(ppmi_df)
    df_list = None

    for i in tqdm(range(len(ppmi_df.index))):
        word_1 = ppmi_df.index[i]
        for j in range(i, len(column_words)):
            word_2 = column_words[j]         
            if ppmi_df[word_1][word_2] > 0:
                if df_list is None:
                    df_list = [[word_to_index[word_1], word_to_index[word_2], ppmi_df[word_1][word_2]]]
                else:
                    df_list.append([word_to_index[word_1], word_to_index[word_2], ppmi_df[word_1][word_2]])

    df = pd.DataFrame(df_list)

    compression_opts = dict(method='zip', archive_name=f'list_ppmi_{k}.csv') 
    df.to_csv(f'/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/list_ppmi_5w_{k}.csv', index=False, compression=compression_opts)
    k += 1

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
 26%|██▌       | 4355/16819 [11:01<29:03,  7.15it/s][A
 26%|██▌       | 4356/16819 [11:01<29:03,  7.15it/s][A
 26%|██▌       | 4357/16819 [11:01<27:22,  7.59it/s][A
 26%|██▌       | 4358/16819 [11:02<26:01,  7.98it/s][A
 26%|██▌       | 4359/16819 [11:02<25:17,  8.21it/s][A
 26%|██▌       | 4360/16819 [11:02<25:09,  8.25it/s][A
 26%|██▌       | 4361/16819 [11:02<24:22,  8.52it/s][A
 26%|██▌       | 4362/16819 [11:02<24:02,  8.64it/s][A
 26%|██▌       | 4363/16819 [11:02<24:09,  8.59it/s][A
 26%|██▌       | 4364/16819 [11:02<24:13,  8.57it/s][A
 26%|██▌       | 4365/16819 [11:02<24:02,  8.63it/s][A
 26%|██▌       | 4366/16819 [11:02<24:15,  8.55it/s][A
 26%|██▌       | 4367/16819 [11:03<23:57,  8.66it/s][A
 26%|██▌       | 4368/16819 [11:03<24:07,  8.60it/s][A
 26%|██▌       | 4369/16819 [11:03<24:32,  8.45it/s][A
 26%|██▌       | 4370/16819 [11:03<23:58,  8.66it/s][A
 26%|██▌       | 4371/

In [None]:
#utility functions for running the CD method
#loss: min 1/2 \sum_t | Yt - UtVt' |^2 + lam/2 \sum_t(|Ut|^2 + |Vt|^2) + 
#                                        tau/2 \sum_t>1(|Vt - Vt-1|^2 + |Ut - Ut-1|^2)
#                                        gam/2 \sum_t (|Ut - Vt|^2)

import numpy as np
import scipy.io as sio
import copy
def update(U,Y,Vm1,Vp1,lam,tau,gam,ind,iflag):
    
    UtU = np.dot(U.T,U) # rxr
    r = UtU.shape[0]    
    if iflag:   M   = UtU + (lam + 2*tau + gam)*np.eye(r)
    else:       M   = UtU + (lam + tau + gam)*np.eye(r)
       
    Uty = np.dot(U.T,Y) # rxb
    Ub  = U[ind,:].T   # rxb
    A   = Uty + gam*Ub + tau*(Vm1.T+Vp1.T)  # rxb
    Vhat = np.linalg.lstsq(M,A) #rxb
    return Vhat[0].T #bxr


#for the above function, the equations are to update V. So:
#Y is n X b (b = batch size)
#r = rank
#U is n X r
#Vm1 and Vp1 are bXr. so they are b rows of V, transposed

def import_static_init(T):
    with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/news_U_matrix_5w.npy', 'rb') as f:
        emb = np.load(f)
    U = [copy.deepcopy(emb[t]) for t in T]
    V = [copy.deepcopy(emb[t]) for t in T]
    return U,V

def initvars(vocab_size,T,rank):
    # dictionary will store the variables U and V. tuple (t,i) indexes time t and word index i  
    U,V = [],[]
    U.append(np.random.randn(vocab_size,rank)/np.sqrt(rank))
    V.append(np.random.randn(vocab_size,rank)/np.sqrt(rank))
    for t in range(1,T):
        U.append(U[0].copy())
        V.append(V[0].copy())
        print(t)
    return U,V
    
import pandas as pd
import scipy.sparse as ss
def getmat(f,v,rowflag):
    data = pd.read_csv(f, compression='zip')
    data = data.values
    
    X = ss.coo_matrix((data[:,2],(data[:,0],data[:,1])),shape=(v,v))
   
   
    if rowflag: 
        X = ss.csr_matrix(X)
        # X = X[inds,:]
    else:
        X = ss.csc_matrix(X)
        # X = X[:,inds]
    
    return X#.todense()

def getbatches(vocab,b):
    batchinds = []
    current = 0
    while current<vocab:
        inds = range(current,min(current+b,vocab))
        current = min(current+b,vocab)
        batchinds.append(inds)
    return batchinds

#   THE FOLLOWING FUNCTION TAKES A WORD ID AND RETURNS CLOSEST WORDS BY COSINE DISTANCE
from sklearn.metrics.pairwise import cosine_similarity
def getclosest(wid,U):
    C = []
    for t in range(len(U)):
        temp = U[t]
        K = cosine_similarity([temp[wid,:]],temp)
        mxinds = np.argsort(-K)
        mxinds = mxinds[0:10]
        C.append(mxinds)
    return C
        
# THE FOLLOWING FUNCTIONS COMPUTES THE REGULARIZER SCORES GIVEN U AND V ENTRIES
def compute_symscore(U,V):
    return np.linalg.norm(U-V)**2

def compute_smoothscore(U,Um1,Up1):
    X = np.linalg.norm(U-Up1)**2 + np.linalg.norm(U-Um1)**2
    return X

In [None]:
# main script for time CD 
# trainfile has lines of the form
# tok1,tok2,pmi

import numpy as np
import pickle as pickle

# PARAMETERS

nw = 18715 # number of words in vocab
T = range(15) # total number of time points
cuda = False # True

trainhead = '/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/list_ppmi_5w_' # location of training data
savehead = '/content/gdrive/MyDrive/Colab Notebooks/Thesis/Models/Classification/Dynamic/Results/'
    
def print_params(r,lam,tau,gam,emph,ITERS):
    
    print('rank = {}'.format(r))
    print('frob  regularizer = {}'.format(lam))
    print('time  regularizer = {}'.format(tau))
    print('symmetry regularizer = {}'.format(gam))
    print('emphasize param   = {}'.format(emph))
    print('total iterations = {}'.format(ITERS))
    
if __name__=='__main__':
    import sys
    ITERS = 30 # total passes over the data
    lam = 10 #frob regularizer
    gam = 50 # forcing regularizer
    tau = 50  # smoothing regularizer
    r   = 50  # rank
    b = nw # batch size
    emph = 1 # emphasize the nonzero

    foo = sys.argv
    for i in range(1,len(foo)):
        if foo[i]=='-r':    r = int(float(foo[i+1]))        
        if foo[i]=='-iters': ITERS = int(float(foo[i+1]))            
        if foo[i]=='-lam':    lam = float(foo[i+1])
        if foo[i]=='-tau':    tau = float(foo[i+1])
        if foo[i]=='-gam':    gam = float(foo[i+1])
        if foo[i]=='-b':    b = int(float(foo[i+1]))
        if foo[i]=='-emph': emph = float(foo[i+1])
        if foo[i]=='-check': erchk=foo[i+1]
    
        
    savefile = savehead+'L'+str(lam)+'T'+str(tau)+'G'+str(gam)+'A'+str(emph)
    
    print('starting training with following parameters')
    print_params(r,lam,tau,gam,emph,ITERS)
    print('there are a total of {} words, and {} time points'.format(nw,T))
    
    print('X*X*X*X*X*X*X*X*X')
    print('initializing')

    Ulist,Vlist = import_static_init(T)

    print('getting batch indices')
    if b < nw:
        b_ind = getbatches(nw,b)
    else:
        b_ind = [range(nw)]
    
    import time
    start_time = time.time()
    # sequential updates
    for iteration in range(19, ITERS):  
        print_params(r,lam,tau,gam,emph,ITERS)
        try:
            Ulist = pickle.load(open( "%sngU_iter%d.p" % (savefile,iteration), "rb" ) )
            Vlist = pickle.load(open( "%sngV_iter%d.p" % (savefile, iteration), "rb" ) )
            print(f'iteration {iteration} loaded succesfully')
            continue
        except(IOError):
            pass
        loss = 0
        # shuffle times
        if iteration == 0: times = T
        else: times = np.random.permutation(T)
        
        for t in range(len(times)):   # select a time
            print(f'iteration {iteration}, time {t}')
            f = trainhead + str(t) + '.csv'
            print(f)
            
            pmi = getmat(f,nw,False)
            for j in range(len(b_ind)): # select a mini batch
                print(f'{j} out of {len(b_ind)}')
                ind = b_ind[j]
                ## UPDATE V
                # get data
                pmi_seg = pmi[:,ind].todense()
                
                if t==0:
                    vp = np.zeros((len(ind),r))
                    up = np.zeros((len(ind),r))
                    iflag = True
                else:
                    vp = Vlist[t-1][ind,:]
                    up = Ulist[t-1][ind,:]
                    iflag = False

                if t==len(T)-1:
                    vn = np.zeros((len(ind),r))
                    un = np.zeros((len(ind),r))
                    iflag = True
                else:
                    vn = Vlist[t+1][ind,:]
                    un = Ulist[t+1][ind,:]
                    iflag = False
                Vlist[t][ind,:] = update(Ulist[t],emph*pmi_seg,vp,vn,lam,tau,gam,ind,iflag)
                Ulist[t][ind,:] = update(Vlist[t],emph*pmi_seg,up,un,lam,tau,gam,ind,iflag)       
                
            ####  INNER BATCH LOOP END
                
        # save
        print('time elapsed = ', time.time()-start_time)

        print(f'Loss {loss}')
       

        pickle.dump(Ulist, open( "%sngU_iter%d.p" % (savefile,iteration), "wb" ) , pickle.HIGHEST_PROTOCOL)
        pickle.dump(Vlist, open( "%sngV_iter%d.p" % (savefile, iteration), "wb" ) , pickle.HIGHEST_PROTOCOL)

starting training with following parameters
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
there are a total of 18715 words, and range(0, 15) time points
X*X*X*X*X*X*X*X*X
initializing
getting batch indices
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 19 loaded succesfully
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 20, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 20, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  279.4237868785858
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 21, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 21, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  582.649356842041
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 22, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 22, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  907.2751004695892
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 23, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 23, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  1274.568953037262
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 24, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 24, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  1695.1327488422394
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 25, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 25, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  2097.1874825954437
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 26, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 26, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  2525.1452090740204
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 27, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 27, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  2949.6015362739563
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 28, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 28, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  3365.2495970726013
Loss 0
rank = 50
frob  regularizer = 10
time  regularizer = 50
symmetry regularizer = 50
emphasize param   = 1
total iterations = 30
iteration 29, time 0
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_0.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 1
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_1.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 2
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_2.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 3
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_3.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 4
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_4.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 5
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_5.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 6
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_6.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 7
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_7.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 8
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_8.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 9
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_9.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 10
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_10.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 11
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_11.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 12
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_12.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 13
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_13.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


iteration 29, time 14
/content/gdrive/MyDrive/Colab Notebooks/Final/Data/News/list_ppmi_5w_14.csv
0 out of 1


  Vhat = np.linalg.lstsq(M,A) #rxb


time elapsed =  3787.364504337311
Loss 0


In [2]:
import pandas as pd
import numpy as np
import scipy.sparse as ss
import pickle as pickle

savehead = '/content/gdrive/MyDrive/Colab Notebooks/Thesis/Models/Classification/Dynamic/Results/'
lam = 10 #frob regularizer
gam = 50 # forcing regularizer
tau = 50  # smoothing regularizer
emph = 1 # emphasize the nonzero
savefile = savehead+'L'+str(lam)+'T'+str(tau)+'G'+str(gam)+'A'+str(emph)
iteration = 26
Ulist = pickle.load(open( "%sngU_iter%d.p" % (savefile,iteration), "rb" ) )

In [3]:
df_words = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Common/classification_words.csv')

In [4]:
import json

with open('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynamic/news_all_vocab_5w.json', 'r', encoding='utf-8') as f:
    all_vocab = json.loads(json.load(f))
  
word_to_index = {w: i for i, w in enumerate(all_vocab)}

In [5]:
df_words['cos_similarity_dynamic'] = [0 for i in range(df_words.shape[0])]

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

for i in range(df_words.shape[0]):
    word = df_words.WORD.iloc[i]
    if word in word_to_index:
        w_id = word_to_index[word]
        year_0 = df_words.BASE_YEAR.iloc[i] - 2000
        year_1 = year_0 + 1
        embed_1 = Ulist[year_0][w_id]
        embed_2 = Ulist[year_1][w_id]
        df_words.loc[i, ('cos_similarity_dynamic')] = cosine_similarity([embed_1], [embed_2])[0][0]

In [7]:
df_words_1 = df_words.drop(df_words[df_words.cos_similarity_dynamic == 0].index)

In [8]:
df_words_1.GROUND_TRUTH.value_counts()

0    204
1     41
2     15
Name: GROUND_TRUTH, dtype: int64

In [None]:
df_words_1.to_csv('/content/gdrive/MyDrive/Colab Notebooks/Thesis/Data/Classification/Dynammic/df_cos_dyn.csv', index=False)

In [10]:
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, balanced_accuracy_score, precision_score, recall_score
import numpy as np
from tqdm import tqdm

rfc_dyn = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
f1_scores = []
balanced_accuracy_scores = []
precision_scores = []
recall_scores = []

for train_index, test_index in tqdm(skf.split(df_words_1.cos_similarity_dynamic, df_words_1.GROUND_TRUTH)):
    x_train_fold, x_test_fold = df_words_1.cos_similarity_dynamic.iloc[train_index], df_words_1.cos_similarity_dynamic.iloc[test_index]
    y_train_fold, y_test_fold = df_words_1.GROUND_TRUTH.iloc[train_index], df_words_1.GROUND_TRUTH.iloc[test_index]
    rfc_dyn.fit(np.expand_dims(np.array(x_train_fold), axis=1), y_train_fold)
    pred = rfc_dyn.predict(np.expand_dims(np.array(x_test_fold), axis=1))
    f1_scores.append(f1_score(y_test_fold, pred, average='macro'))
    balanced_accuracy_scores.append(balanced_accuracy_score(y_test_fold, pred))
    precision_scores.append(precision_score(y_test_fold, pred, average='macro'))
    recall_scores.append(recall_score(y_test_fold, pred, average='macro'))

print(f'\nMean F1 score: {np.mean(f1_scores):.4f}')
print(f'Mean balanced accuracy score: {np.mean(balanced_accuracy_scores):.4f}')
print(f'Mean precision score: {np.mean(precision_scores):.4f}')
print(f'Mean recall score: {np.mean(recall_scores):.4f}')

5it [00:01,  2.52it/s]


Mean F1 score: 0.4128
Mean balanced accuracy score: 0.4036
Mean precision score: 0.4535
Mean recall score: 0.4036



