In [26]:
# Import modules
import json
from sqlalchemy import Text, create_engine, Integer, String, Column, DateTime, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from datetime import datetime
from sqlalchemy.types import TypeDecorator

In [27]:
engine = create_engine('sqlite:///sleep_performance.db', echo = True)

Base = declarative_base()


SIZE = 256
# You can create a custom type by subclassing sqlalchemy.types.
# TypeDecorator to handle serialization and deserialization to Text.
class TextPickleType(TypeDecorator):

    impl = Text(SIZE)

    def process_bind_param(self, value, dialect):
        if value is not None:
            value = json.dumps(value)

        return value

    def process_result_value(self, value, dialect):
        if value is not None:
            value = json.loads(value)
        return value

class OptModel(Base):
    __tablename__ = 'opt_models'

    id = Column(Integer, primary_key = True)
    n_cluster = Column(Integer)
    created_at = Column(DateTime, default = datetime.now(), onupdate = datetime.now())
    # JSON datatype works only with sqlalchemy CORE.. We use ORM
    cv_results = Column(TextPickleType())


Base.metadata.create_all(engine)


keys = ['n_cluster', 'cv_results']

fun = [5, {'a': '5', 'b': '10'} ]

an_opt_model = OptModel(**{key: value for key, value in zip(keys ,fun[1:])})

Session = sessionmaker(bind = engine)

session = Session()

session.add(an_opt_model)

2017-07-25 16:30:25,856 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-07-25 16:30:25,857 INFO sqlalchemy.engine.base.Engine ()
2017-07-25 16:30:25,860 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-07-25 16:30:25,860 INFO sqlalchemy.engine.base.Engine ()
2017-07-25 16:30:25,861 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("opt_models")
2017-07-25 16:30:25,862 INFO sqlalchemy.engine.base.Engine ()
2017-07-25 16:30:25,863 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE opt_models (
	id INTEGER NOT NULL, 
	n_cluster INTEGER, 
	created_at DATETIME, 
	cv_results TEXT(256), 
	PRIMARY KEY (id)
)


2017-07-25 16:30:25,864 INFO sqlalchemy.engine.base.Engine ()
2017-07-25 16:30:25,898 INFO sqlalchemy.engine.base.Engine COMMIT


In [15]:
import scipy.io as sio
from sklearn.decomposition import NMF

In [2]:
spec_path ='/zhome/49/7/76434/Documents/Data/SpecsAndLabels.mat'

In [3]:
SaL = sio.loadmat(spec_path)

In [31]:
spec1 = SaL['SPEC_1'][0][0]
spec2 = SaL['SPEC_2'][0][0]

In [32]:
spec1.shape

(781, 2048)

In [40]:
model = NMF(n_components=30, init='nndsvd', random_state=0)
W1 = model.fit_transform(spec1)
spec1_transf = model.inverse_transform(W1)
recon_error = model.reconstruction_err_

In [41]:
print W1.shape
print spec1_transf.shape

(781, 30)
(781, 2048)


In [42]:
W2 = model.transform(spec2)
spec2_transf = model.inverse_transform(W2)

In [43]:
print W2.shape
print spec2_transf.shape

(1067, 30)
(1067, 2048)


In [44]:
result_path ='/zhome/49/7/76434/Documents/TestSpecs.mat'

In [45]:
sio.savemat(result_path, {'spec1_transf': spec1_transf, 'spec2_transf': spec2_transf, 
                          'W1': W1, 'W2': W2, 'spec1': spec1, 'spec2': spec2})

In [35]:
from sklearn.decomposition import NMF
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import json 
import pandas as pd

In [23]:
file_name = "/zhome/49/7/76434/Documents/Data/SpecsAndLabels.mat"
SpecsLabels = sio.loadmat(file_name)

In [81]:
SpecsLabels['SPEC_1'][0][1].shape

(1043, 2048)

In [37]:
def compute_nnmf_logLik(subject_spec1,nnmf_recon):
    
    [r, c] = subject_spec1.shape

    diffSq = (nnmf_recon - subject_spec1)**2
    sigma = np.sum(diffSq) / (r*c)
    
    #print "sigma = ", sigma
    # inverse variance
    beta = 1 / sigma

    negLoglike = - beta * np.sum(diffSq)/2 + r * c * np.log(beta) / 2 - r * c * np.log(2 * np.pi) / 2
    return negLoglike

In [38]:
def find_optimum_nnmf_dims(subject_spec1):
    
    lowest_BIC = np.inf
    opt_dim = 0
    
    BIC_values = []
    dim_values = range(10,100,10)
    
    [r, c] = subject_spec1.shape
    
    for n in dim_values:
        #print "\nNumber of NNMF dims = ", n
        model = NMF(n_components=n, init='nndsvd', random_state=0)
        W1 = model.fit_transform(subject_spec1)
        nnmf_recon = model.inverse_transform(W1)
        
        negLoglike = compute_nnmf_logLik(subject_spec1,nnmf_recon)
        Q = n*(r + c)
        
        current_BIC = -2 * negLoglike + 2 * Q * np.log(r * c) / 2;
        #print "Log Likelihood = ", negLoglike
        #print "Q = ", Q
        #print "BIC = ", current_BIC
        
        BIC_values.append(current_BIC)
        
        if current_BIC < lowest_BIC:
            lowest_BIC = current_BIC
            opt_dim = n
            best_model = model
            
    #plt.plot(dim_values, BIC_values)
    #plt.ylabel('BIC')
    #plt.xlabel('number of dimensions')
    #plt.show()
    print "opt_dim = ", opt_dim
    return {'nnmf_dim' : opt_dim, 'nnmf_BIC': lowest_BIC}, best_model

In [43]:
def col_names_tuple(n_features):
    #global file_name
    #row1_colnames = [["subject"+str(x)]*(n_features*2 + 4) for x in range(1,20)]
    row1_colnames = [ ["val"]*(n_features + 2), ["test"]*(n_features + 2) ]
    row2_colnames = [ ["X"]*n_features, ["y_c4"], ["y_c6"] ]*2
    row3_colnames = [range(n_features), [1]*2] * 2
    
    col_names = map(flatten,[ row1_colnames, row2_colnames, row3_colnames])
    
    return list(zip(*col_names))

In [51]:
def col_recon_tuple():
    n_features = 2049
    row1_colnames = [ ["train"]*(n_features), ["test"]*(n_features) ]
    row2_colnames = [range(n_features * 2)]
    
    col_names = map(flatten,[ row1_colnames, row2_colnames])
    
    return list(zip(*col_names))

In [45]:
# funciton for flattening lists
flatten = lambda l: [item for sublist in l for item in sublist]

In [53]:
num_subjects = len(SpecsLabels['SPEC_1'][0])
opt_n_features = {}

#XTRAIN = np.zeros((num_subjects,), dtype=np.object)3XTEST = np.zeros((num_subjects,), dtype=np.object)

zipped_data = zip(SpecsLabels['SPEC_1'][0],SpecsLabels['ANNOT_1'][0],SpecsLabels['ANNOTORIG_1'][0],
          SpecsLabels['SPEC_2'][0],SpecsLabels['ANNOT_2'][0],SpecsLabels['ANNOTORIG_2'][0])
sub_counter = 0

for x1,y1_4,y1_6,x2,y2_4,y2_6 in zipped_data:
    d, model = find_optimum_nnmf_dims(x1)
    n_features = d['nnmf_dim']
    opt_n_features['Subject_' + str(sub_counter)] = d
    xtrain = model.fit_transform(x1)
    spec_recon_train = inverse_transform(xtrain)
    xtest = model.transform(x2)
    spec_recon_test = inverse_transform(xtest)
    data = pd.concat([pd.DataFrame(xtrain),pd.DataFrame(y1_4),pd.DataFrame(y1_6),
                     pd.DataFrame(xtest), pd.DataFrame(y2_4), pd.DataFrame(y2_6)], axis=1, ignore_index = True)
    index = pd.MultiIndex.from_tuples(col_names_tuple(n_features), names=['Sets', 'Dataspec', 'columns'])
    data.columns = index
    data.to_pickle('/zhome/49/7/76434/Documents/Data/NNMFandLabelsSubject_' + str(sub_counter) + '.pckl')
    
    data_recon = pd.concat([pd.DataFrame(spec) for spec in [spec_recon_train, spec_recon_test]], 
                           axis=1, ignore_index=True)
    index_recon = pd.MultiIndex.from_tuples(col_recon_tuple(), names=['Sets', 'columns'])
    data_recon.columns = index_recon
    
    data_recon.to_pickle('/zhome/49/7/76434/Documents/Data/ReconstructedDataSubject_' + str(sub_counter) + '.pckl')
    
    sub_counter += 1
    
json.dump(opt_n_features,'/zhome/49/7/76434/Documents/Data/OptDimDict.json')    


KeyboardInterrupt: 

In [47]:
print index

MultiIndex(levels=[[u'test', u'val'], [u'X', u'y_c4', u'y_c6'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]],
           labels=[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 1, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 2

# GMM

## Make synthetic dataset

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [7]:
def col_names_tuple(n_features):
    row1_colnames = [ ["val"]*(n_features + 2), ["test"]*(n_features + 2) ]
    row2_colnames = [ ["X"]*n_features, ["y_c4"], ["y_c6"] ]*2
    row3_colnames = [range(n_features), [1]*2] * 2
    
    col_names = map(flatten,[ row1_colnames, row2_colnames, row3_colnames])
    
    return list(zip(*col_names))

In [11]:
# funciton for flattening lists
flatten = lambda l: [item for sublist in l for item in sublist]

In [28]:
def get_synthetic_dataset():
    N = 100
    N_labels = 10
    mean_value = 2
    wrongLabelFrac = 2.0/3.0
    mean1 = np.array([1, 1])*mean_value
    mean2 = -mean1
    cov1 = [[1, 0.5], [0.5, 1]]
    cov2 = [[1, -0.5], [-0.5, 1]]
    n_features = 2
    
    x1, y1 = np.random.multivariate_normal(mean1, cov1, N).T
    x2, y2 = np.random.multivariate_normal(mean2, cov2, N).T
    
    labels1 = np.random.permutation(np.append(np.append(np.ones([int(round(N_labels*wrongLabelFrac)),1],dtype=np.int8), 
                    np.ones([int(round(N_labels*(1-wrongLabelFrac))),1],dtype=np.int8)*2), 
                    np.zeros([N-N_labels,1],dtype=np.int8)))
    
    labels2 = np.random.permutation(np.append(np.append(np.ones([int(round(N_labels*wrongLabelFrac)),1],dtype=np.int8)*2,
                    np.ones([int(round(N_labels*(1-wrongLabelFrac))),1],dtype=np.int8)), 
                    np.zeros([N-N_labels,1],dtype=np.int8)))
    
    data = pd.concat([pd.DataFrame(x1), pd.DataFrame(y1), pd.DataFrame(labels1), pd.DataFrame(labels1),
                 pd.DataFrame(x2), pd.DataFrame(y2), pd.DataFrame(labels2), pd.DataFrame(labels2)], axis=1, ignore_index = True)
    
    index = pd.MultiIndex.from_tuples(col_names_tuple(n_features), names = ['sets', 'dataspec', 'columns'])
    data.columns = index
    
    return data

In [29]:


data = get_synthetic_dataset()
print data

sets           val                          test                    
dataspec         X           y_c4 y_c6         X           y_c4 y_c6
columns          0         1    1    1         0         1    1    1
0         3.198531  2.492364    0    0 -1.341622 -3.438339    0    0
1         1.516091  2.740435    0    0 -2.809086 -3.393853    0    0
2         1.441828  2.296428    0    0 -2.066180 -1.095390    0    0
3         0.580059  2.258693    0    0 -2.331700 -1.762819    0    0
4         0.080982  0.787015    0    0 -2.459751 -1.390109    0    0
5         1.223831  1.907615    0    0 -1.180408 -1.762890    0    0
6         0.423431  0.726080    0    0 -2.163887 -2.107938    0    0
7         1.818949  1.405503    1    1 -0.978910 -2.566349    0    0
8         0.615898  0.636456    0    0 -2.294618  0.569881    0    0
9         0.722853  1.861950    0    0 -2.392394 -1.901298    0    0
10        2.280702  3.099235    0    0 -2.429912 -1.765417    0    0
11        2.084533  3.674428    0 

In [None]:
plt.close()
plt.plot(dataset.x1, dataset.y1, 'v', color = '0.75')
plt.plot(dataset.x2, dataset.y2, 'o', color = '0.75')
plt.plot(dataset.x1[labels1==1], dataset.y1[labels1==1], 'v', color = 'b')
plt.plot(dataset.x1[labels1==2], dataset.y1[labels1==2], 'v', color = 'r')
plt.plot(dataset.x2[labels2==1], dataset.y2[labels2==1], 'o', color = 'b')
plt.plot(dataset.x2[labels2==2], dataset.y2[labels2==2], 'o', color = 'r')
plt.axis('equal')
plt.show()

# Play with database

In [1]:
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
#from to_sql import *
from datetime import datetime
from sqlalchemy import Column, Integer, String


In [3]:
#engine = create_engine('sqlite:///DataBase/sleep_performance.db', echo = False)
engine = create_engine('sqlite:///DataBase/ourDB.db', echo = True)

Base = declarative_base()

In [4]:
class User(Base):
    __tablename__ = 'users'
    id = Column(Integer, primary_key=True)
    name = Column(String)
    fullname = Column(String)
    password = Column(String)
    
    def __repr__(self):
       return "<User(name='%s', fullname='%s', password='%s')>" % (
                            self.name, self.fullname, self.password)

In [5]:
User.__table__

Table('users', MetaData(bind=None), Column('id', Integer(), table=<users>, primary_key=True, nullable=False), Column('name', String(), table=<users>), Column('fullname', String(), table=<users>), Column('password', String(), table=<users>), schema=None)

In [6]:
Base.metadata.create_all(engine)

In [7]:
ed_user = User(name='ed', fullname='Ed Jones', password='edspassword')
se

'ed'

In [9]:
print ed_user.password
str(ed_user.id)

edspassword


'None'

In [11]:
Session = sessionmaker(bind=engine)
session = Session()

In [14]:
session.add(ed_user)

In [15]:
our_user = session.query(User).filter_by(name='ed').first()

In [16]:
print our_user

<User(name='ed', fullname='Ed Jones', password='edspassword')>


In [17]:
session.add_all([
    User(name='wendy', fullname='Wendy Williams', password='foobar'),
    User(name='mary', fullname='Mary Contrary', password='xxg527'),
    User(name='fred', fullname='Fred Flinstone', password='blah')])

In [18]:
session.commit()