In [1]:
!pip install torchbiggraph --user



In [6]:
import argparse
import os
from itertools import chain

import attr

import torchbiggraph.converters.utils as utils
from torchbiggraph.config import parse_config
from torchbiggraph.converters.import_from_tsv import convert_input_data
from torchbiggraph.eval import do_eval
from torchbiggraph.train import train

from filtered_eval import FilteredRankingEvaluator


FB15K_URL = 'https://dl.fbaipublicfiles.com/starspace/fb15k.tgz'
FILENAMES = {
    'train': 'FB15k/freebase_mtr100_mte100-train.txt',
    'valid': 'FB15k/freebase_mtr100_mte100-valid.txt',
    'test': 'FB15k/freebase_mtr100_mte100-test.txt',
}


def convert_path(fname):
    basename, _ = os.path.splitext(fname)
    out_dir = basename + '_partitioned'
    return out_dir


def main():
    parser = argparse.ArgumentParser(description='Example on FB15k')
    parser.add_argument('--config', default='examples/configs/fb15k_config.py',
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir', default='data',
                        help='where to save processed data')
    parser.add_argument('--no-filtered', dest='filtered', action='store_false',
                        help='Run unfiltered eval')
    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    fpath = utils.download_url(FB15K_URL, data_dir)
    utils.extract_tar(fpath)
    print('Downloaded and extracted file.')

    edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()]
    convert_input_data(
        args.config,
        edge_paths,
        lhs_col=0,
        rhs_col=2,
        rel_col=1,
    )

    config = parse_config(args.config, overrides)

    train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))]
    train_config = attr.evolve(config, edge_paths=train_path)

    train(train_config)

    eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))]
    relations = [attr.evolve(r, all_negs=True) for r in config.relations]
    eval_config = attr.evolve(config, edge_paths=eval_path, relations=relations)
    if args.filtered:
        filter_paths = [
            convert_path(os.path.join(data_dir, FILENAMES['test'])),
            convert_path(os.path.join(data_dir, FILENAMES['valid'])),
            convert_path(os.path.join(data_dir, FILENAMES['train'])),
        ]
        do_eval(eval_config, FilteredRankingEvaluator(eval_config, filter_paths))
    else:
        do_eval(eval_config)


if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'torchbiggraph'

In [None]:
def parseMGF(mgfData):
    data = mgfData.read_text().split('\n')
    _comments = '#;!/'
    reading_spectrum = False
    params = {}
    masses = []
    intensities = []
    charges = []
    out = {}
    cnt = 0
    pep_mass = 0
    pep_intensity = 0
    out = {}
    for line in data:
        if not reading_spectrum:
            if line.strip() == 'BEGIN IONS': reading_spectrum = True
        else:
            if not line.strip() or any(line.startswith(c) for c in _comments): pass
            elif line.strip() == 'END IONS':
                reading_spectrum = False
                title = params['title'].split()[0]
                if 'pepmass' in params:
                    try:
                        pl = params['pepmass'].split()
                        if len(pl) > 1:
                            pep_mass = float(pl[0])
                            pep_intensity = float(pl[1])
                        elif len(pl) == 1: pep_mass = float(pl[0])
                    except ValueError: print("Error in parsing pepmass value")
                out[cnt] = {'pep_mass': pep_mass,'pep_intensity': pep_intensity,'rtinseconds': params['rtinseconds'],'title': params['title'],'charge': params['charge'],'mz_array': np.array(masses),'intensity_array': np.array(intensities)}
                cnt += 1
            else:
                l = line.split('=', 1)
                if len(l) > 1: params[l[0].lower()] = l[1].strip()
                elif len(l) == 1:  # looks like a peak list ;)
                    l = line.split()
                    if len(l) >= 2000:
                        try:
                            masses.append(float(l[0]))
                            intensities.append(float(l[1]))
                        except ValueError:
                            print("Error in parsing line "+line)
    return out

In [4]:
#!pip3 install tensorflow-gpu
!pip install tf-nightly-gpu-2.0-preview --user

Collecting tf-nightly-gpu-2.0-preview
  Using cached https://files.pythonhosted.org/packages/81/b9/249ce060369459014d22d58b0d960ae16b24b200795d0c326d88536e6ac6/tf_nightly_gpu_2.0_preview-2.0.0.dev20190330-cp36-cp36m-manylinux1_x86_64.whl
Collecting google-pasta>=0.1.2 (from tf-nightly-gpu-2.0-preview)
  Using cached https://files.pythonhosted.org/packages/8c/96/adbd4eafe72ce9b5ca6f168fbf109386e1b601f7c59926a11e9d7b7a5b44/google_pasta-0.1.4-py3-none-any.whl
Collecting tensorflow-estimator-2.0-preview (from tf-nightly-gpu-2.0-preview)
  Using cached https://files.pythonhosted.org/packages/65/31/88dd7539266d2debdf0eabd47ef4456d9f1685ce7339b8dd8b7029f7c41e/tensorflow_estimator_2.0_preview-1.14.0.dev2019033000-py2.py3-none-any.whl
Collecting absl-py>=0.7.0 (from tf-nightly-gpu-2.0-preview)
Collecting tb-nightly<1.15.0a0,>=1.14.0a0 (from tf-nightly-gpu-2.0-preview)
  Using cached https://files.pythonhosted.org/packages/5d/17/a3d05a0664c11703259aa79d2b58b871b3bb1fff24153f75db04540489db/tb_nig

In [5]:
import tensorflow as tf
print(tf.__version__)
#tf.enable_eager_execution()
#print(tf.executing_eagerly())
print(tf.test.is_gpu_available())#:with tf.device("/gpu:0"):
#tf.keras.backend.clear_session()

1.12.0
True


In [11]:
#https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l02c01_celsius_to_fahrenheit.ipynb
import numpy as np
celsius_q    = np.array([-40, -10,  0,  8, 15, 22,  38],  dtype=float)
fahrenheit_a = np.array([-40,  14, 32, 46, 59, 72, 100],  dtype=float)

with tf.device('/gpu:0'):       # Run nodes with GPU 0
    l0 = tf.keras.layers.Dense(units=1, input_shape=[1])  
    model = tf.keras.models.Sequential([l0])
    #model.compile(optimizer=tf.optimizers.Adam())
    model.compile(optimizer='adam',loss='mean_squared_error')
    history = model.fit(celsius_q, fahrenheit_a, epochs=1000, batch_size=len(celsius_q), verbose=False)
print(history)
print(model.predict([37.0]))

<tensorflow.python.keras.callbacks.History object at 0x7f3038ebee80>
[[76.256195]]


In [None]:
import pathlib
import numpy as np

In [None]:
#file = pathlib.Path.cwd().parent.rglob('*.MGF')
#file = pathlib.Path.home()/'mgf' / '190128_robin_WT_5.raw.centroid.MGF'
file = pathlib.Path.home()/'mgf' / '190128_robin_ 6' / '190128_robin_WT_5.mgf'
print(file.exists())#read_text().split(' '))
out=parseMGF(file)

In [None]:
file_rr = pathlib.Path.home()/'mgf' / '190128_robin_WT_5.raw.centroid.MGF'
print(file_rr.exists())#read_text().split(' '))
out_rr=parseMGF(file_rr)

In [None]:
X=[(out[k]['pep_mass']-1.00727647)*int(out[k]['charge'].split('+')[0]) for k, _ in out.items()]
X=np.array(X).reshape(-1, 1)
print(X.shape)

In [None]:
X_int=[(out[k]['pep_intensity']) for k, _ in out.items()]
X_int=np.array(X_int).reshape(-1, 1)
print(X_int.shape)

In [None]:
k=0
print(out[k],X_int[k],X[k])

In [None]:
X_rr=[(out_rr[k]['pep_mass']-1.00727647)*int(out_rr[k]['charge'].split('+')[0]) for k, _ in out_rr.items()]
X_rr=np.array(X_rr).reshape(-1, 1)
print(X_rr.shape)

In [None]:
import torch
print("#GPU-#", torch.cuda.device_count())

In [None]:
!pip3 install tf-nightly-2.0-preview 

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
import time
ts = time.time()
#for i in range(len(X_rr)):
for i in range(10):
    if(X_rr[i]-X[i]>10e-6):
        print(i,out[i]['pep_mass'],out_rr[i]['pep_mass'])
te = time.time()

In [None]:
print(te-ts)

In [None]:
Device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(Device)

In [None]:
X_t=torch.tensor(X, dtype=torch.double, device=Device)
X_trr=torch.tensor(X_rr, dtype=torch.double, device=Device)

In [None]:
import time
ts = time.time()
#for i in range(len(X_t)):
for i in range(10):
    if(X_trr[i]-X_t[i]>10e-6):
        print(i,X_t[i],X_trr[i])
te = time.time()

In [None]:
print(te - ts)

In [None]:
X_trr[:4]-X_t[:4]

In [None]:
print(torch.allclose(X_t[:4], X_trr[:4],atol=1e-04, rtol=1e-05,equal_nan=True))

In [None]:
print(X_trr[3940:3948],X_t[3940:3948])

In [None]:
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual, FloatSlider

def select_intensity(value):
    ax = plt.scatter(X[X_int>value],np.log(X_int[X_int>value]))

interact(select_intensity, value=FloatSlider(min=1e3, max=1e8, step=1e5, continuous_update=False))
#plt.scatter(X[X_int>10e7],np.log(X_int[X_int>10e7]))

In [None]:
#import ipyvolume as ipv
#!pip install ipywidgets==7.4 --upgrade --user

In [None]:
from sklearn.cluster import KMeans

model = KMeans(n_clusters=9, random_state=1).fit(np.concatenate((X,X_int),axis=1))
model.cluster_centers_    

In [None]:
X_con=np.concatenate((X,X_int),axis=1)

In [None]:
sse = {}
for k in range(1, 10):
    kmeans = KMeans(n_clusters=k, max_iter=1000).fit(X_con)
    sse[k] = kmeans.inertia_ # Inertia: Sum of distances of samples to their closest cluster center
plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
plt.xlabel("Number of cluster")
plt.ylabel("SSE")
plt.show()

In [None]:
import functools, traceback
def gpu_mem_restore(func):
    "Reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except:
            type, val, tb = sys.exc_info()
            traceback.clear_frames(tb)
            raise type(val).with_traceback(tb) from None
    return wrapper

In [None]:
gpu_mem_restore(1)
@gpu_mem_restore
def dbx(x):
    return x*2
dbx(3)

In [None]:
sse

In [None]:
#r = range(len(model.n_clusters))
#mass_cluster = map(lambda l: model.n_clusters[l][0], r)
#intensity_cluster = map(lambda l: model.n_clusters[l][1], r)
#sizeC = map(lambda l: float(testedMgf[l][1]), r)
#maxS = float(max(sizeC))
#scaledSizeC = map(lambda l: (sizeC[l]/maxS)*150, r)
#scaledSizeC
#clusters.clusterCenters
#print(mass_cluster,intensity_cluster)

In [None]:
#!pip install nevergrad --user

In [None]:
import nevergrad.optimization as optimization
import numpy as np

def simulate_and_return_test_error_with_rl(x, noisy=True):
    return np.linalg.norm([int(50. * abs(x_ - 0.2)) for x_ in x]) + noisy * len(x) * np.random.normal()

budget = 1200  # How many trainings we will do before concluding.

for tool in ["TwoPointsDE", "RandomSearch", "TBPSA", "CMA", "NaiveTBPSA",
        "PortfolioNoisyDiscreteOnePlusOne"]:

    optim = optimization.registry[tool](dimension=300, budget=budget)

    for u in range(budget // 3):
        x1 = optim.ask()
        x2 = optim.ask()
        x3 = optim.ask()
        y1 = simulate_and_return_test_error_with_rl(x1)
        y2 = simulate_and_return_test_error_with_rl(x2)
        y3 = simulate_and_return_test_error_with_rl(x3)
        optim.tell(x1, y1)
        optim.tell(x2, y2)
        optim.tell(x3, y3)

    recommendation = optim.provide_recommendation()
    print("* ", tool, " provides a vector of parameters with test error ",
          simulate_and_return_test_error_with_rl(recommendation, noisy=False))

In [None]:
plt.scatter(X, X_int)

In [None]:
#print(torch.tensor(1))#/torch.tensor(0))

In [None]:
#!pip install distro --user

In [None]:
#python -c 'import fastai.utils.collect_env; fastai.utils.collect_env.show_install(1)'
import fastai.utils.collect_env
fastai.utils.collect_env.show_install(1)

In [None]:
X_t

In [None]:
#https://www.youtube.com/watch?v=C8KEtrWjjyo&list=PLtmWHNX-gukIc92m1K0P6bIOnZb-mg0hY&index=3&t=239s
import torch
import torch.cuda as tc
from torch.autograd import Variable

In [None]:
X_cont=Variable(torch.tensor(X_con,dtype=torch.float32), requires_grad=True).cuda()
#X_cont=torch.tensor(X_con,dtype=torch.float32).cuda()

In [None]:
len(X_cont)

In [None]:
pW = Variable(tc.FloatTensor(len(X_cont),10), requires_grad=True)
pH = Variable(tc.FloatTensor(10,2), requires_grad=True)
pW.data.normal_(std=0.01).abs_()
pH.data.normal_(std=0.01).abs_()

In [None]:
lam=1e6
def report():
    W,H = pW.data, pH.data
    print((X_cont-W.mm(H)).sum())

def penalty(A):
    return torch.pow((A<0).type(tc.FloatTensor)*torch.clamp(A, max=0.), 2)

def penalize(): return penalty(pW).mean() + penalty(pH).mean()

def loss(): return (X_cont-pW.mm(pH)).norm(2) + penalize()*lam

In [None]:
opt = torch.optim.Adam([pW,pH], lr=1e-3, betas=(0.9,0.9))
lr = 0.05
report()

In [None]:
for i in range(100000): 
    opt.zero_grad()
    l = loss()
    l.backward()
    opt.step()
    if i % 10000 == 99: 
        report()
        lr *= 0.9  

In [None]:
plt.plot(pW)

In [None]:
plt.plot(pH)