# Project 2 : Two dimentional Ising model

In [2]:
import pickle
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.model_selection as skms
import sklearn.linear_model as skl
import sklearn.metrics as skm
import tqdm
import copy
import time
from IPython.display import display

%matplotlib inline

sns.set(color_codes=True)

import algorithms

In [3]:
cwd = os.getcwd()
filenames = glob.glob(os.path.join(cwd, 'files*'))

In [4]:
label_filename = "/home/hanna/project2/files/Ising2DFM_reSample_L40_T=All_labels.pkl"

# Read in the labels
with open(label_filename, "rb") as f:
    labels = pickle.load(f)

dat_filename = "/home/hanna/project2/files/Ising2DFM_reSample_L40_T=All.pkl"
    
# Read in the corresponding configurations
with open(dat_filename, "rb") as f:
    data = np.unpackbits(pickle.load(f)).reshape(-1, 1600).astype("int")

# Set spin-down to -1
data[data == 0] = -1

In [5]:
data.shape

(160000, 1600)

In [6]:
# Set up slices of the dataset
ordered = slice(0, 70000)
critical = slice(70000, 100000)
disordered = slice(100000, 160000)

X = np.concatenate((data[ordered], data[disordered]))
Y = np.concatenate((labels[ordered], labels[disordered]))

In [7]:
labels[ordered].shape, labels[disordered].shape

((70000,), (60000,))

In [8]:
data[ordered].shape, data[disordered].shape

((70000, 1600), (60000, 1600))

In [9]:
Y.shape

(130000,)

In [10]:
data[disordered]

array([[ 1,  1, -1, ..., -1,  1,  1],
       [ 1,  1, -1, ...,  1,  1,  1],
       [-1, -1, -1, ..., -1, -1, -1],
       ..., 
       [ 1,  1,  1, ...,  1,  1, -1],
       [ 1,  1,  1, ...,  1, -1, -1],
       [ 1,  1,  1, ...,  1, -1, -1]])

In [11]:
X.shape

(130000, 1600)

In [12]:
X_train, X_test, y_train, y_test = skms.train_test_split(X,Y, test_size = 0.333)

In [13]:
from utils import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, split_size=0.5, random_state=0)

In [14]:
# Testing that y_test and train contain both label 
y_train, y_test

(array([1, 0, 0, ..., 1, 1, 1]), array([1, 1, 1, ..., 0, 0, 0]))

# Sammenlign med Stochastic gradient descent. 

In [15]:
eta = 0.1
n_iter = 50
random_state = 1
key = "ridge" 
lmd=0.01
descent_method= "sgd"

eta = [0.0001, 0.001, 0.01, 0.1]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

accu = []

for e in eta:
    for l in lmd:
        a = algorithms.LogisticRegression(eta = e, random_state = random_state, key = key, dm = descent_method, shuffle = True, n_iter = 100, batch_size = 10, epochs=100,lmd = l, tolerance=1e-14)
        log = a.fit(X_train, y_train)
        score = log.predict(X_test)
        acc = np.sum(score == y_test)/len(score)
        accu.append(acc)
        print(acc)

0.498692307692
0.502430769231
0.502646153846
0.504369230769
0.543430769231
0.730815384615
0.502369230769
0.502676923077
0.504369230769
0.544307692308
0.728338461538
0.459861538462
0.502676923077
0.504369230769
0.543430769231
0.730815384615
0.459861538462
0.459861538462
0.504369230769
0.543430769231
0.728338461538
0.459861538462
0.459861538462
0.459861538462


In [18]:
accu # prøv ny batch størrelse

[0.49869230769230771,
 0.50243076923076924,
 0.50264615384615385,
 0.50436923076923079,
 0.54343076923076927,
 0.73081538461538464,
 0.50236923076923079,
 0.50267692307692313,
 0.50436923076923079,
 0.54430769230769227,
 0.72833846153846149,
 0.45986153846153849,
 0.50267692307692313,
 0.50436923076923079,
 0.54343076923076927,
 0.73081538461538464,
 0.45986153846153849,
 0.45986153846153849,
 0.50436923076923079,
 0.54343076923076927,
 0.72833846153846149,
 0.45986153846153849,
 0.45986153846153849,
 0.45986153846153849]

In [19]:
m = np.array(accu).reshape((len(eta), len(lmd)))

plt.figure(figsize=(12,6))
ax = sns.heatmap(m, annot = True, xticklabels=lmd, yticklabels=eta)
ax.set_title("Logistic regression SGD Ridge", fontsize = 20)
ax.set_xlabel("Lambda value", fontsize = 15)
ax.set_ylabel("Eta value", fontsize = 15) #
plt.savefig("./results/figures/Logisticregression_Ridge_sgd.png")

TypeError: object of type 'float' has no len()

# Standard/steepest gradient descent

In [20]:
eta = 0.01
n_iter = 50
random_state = 1
key = "ols" 
lmd=0.01
# (self, eta, random_state, key, n_iter = 50, lmd = 0, tolerance=1e-14):
a = algorithms.LogisticRegression(eta = eta, random_state = random_state, key = key, dm = "steepest", shuffle = True, n_iter = 100, batch_size = 10, epochs=100,lmd = lmd, tolerance=1e-14)

In [17]:
log = a.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
score = log.predict(X_test)

In [None]:
score

In [None]:
acc = np.sum(score == y_test)/len(score)
acc

In [None]:
accuracy = {"ols": [], "ridge":[], "lasso": []}

In [None]:
eta = [0.0001, 0.001, 0.01, 0.1]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
key = "lasso"

for e in eta:
    for l in lmd:
        a = algorithms.LogisticRegression(e, random_state, key, n_iter, l)
        log = a.fit(X_train, y_train)
        score = log.predict(X_test)
        acc = np.sum(score == y_test)/len(score)
        accuracy[key].append(acc)

In [None]:
accuracy["lasso"]

In [None]:
m = np.array(accuracy["lasso"]).reshape((len(eta), len(lmd)))
m

In [None]:
m = np.array(accuracy["lasso"]).reshape((len(eta), len(lmd)))

plt.figure(figsize=(12,6))
ax = sns.heatmap(m, annot = True, xticklabels=lmd, yticklabels=eta)
ax.set_title("Accuracy of prediction using Lasso", fontsize = 20)
ax.set_xlabel("Lambda value", fontsize = 15)
ax.set_ylabel("Eta value", fontsize = 15)
plt.savefig("./results/figures/Logisticregression_Lasso.png")

# OLS

In [None]:
key = "ols"
etas = [0.0001, 0.001, 0.01, 0.1]

for e in etas:
    a = algorithms.LogisticRegression(e, random_state, key, n_iter)
    log = a.fit(X_train, y_train)
    score = log.predict(X_test)
    acc = np.sum(score == y_test)/len(score)
    accuracy[key].append(acc)

In [None]:
m = np.array(accuracy[key])
print(m)
#np.array(m).reshape((4, 1))

In [None]:
m = np.array(accuracy[key]).reshape(1,(len(eta)))

plt.figure(figsize=(12,6))
ax = sns.heatmap(m, annot = True, xticklabels=eta)
ax.set_title("Logistic regression OLS", fontsize = 20)
ax.set_xlabel("Eta value", fontsize = 15)
ax.set_ylabel("Lambda value (lmd=0)", fontsize = 15)
plt.savefig("./results/figures/Logisticregression_OLS.png")

# Ridge

In [None]:
eta = [0.0001, 0.001, 0.01, 0.1]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

key = "ridge"

for e in eta:
    for l in lmd:
        a = algorithms.LogisticRegression(e, random_state, key, n_iter, l)
        log = a.fit(X_train, y_train)
        score = log.predict(X_test)
        acc = np.sum(score == y_test)/len(score)
        accuracy[key].append(acc)

In [None]:
m = np.array(accuracy[key]).reshape((len(eta), len(lmd)))

plt.figure(figsize=(12,6))
ax = sns.heatmap(m, annot = True, xticklabels=lmd, yticklabels=eta)
ax.set_title("Logistic regression Ridge", fontsize = 20)
ax.set_xlabel("Lambda value", fontsize = 15)
ax.set_ylabel("Eta value", fontsize = 15) #
plt.savefig("./results/figures/Logisticregression_Ridge.png")

Accuracy of one can be reffered to as a perfect classyfier.
# OBS! Sammenlign dette med scikitlearn

In [None]:
# Case ridge for the best? optimizer.
clf = skl.LogisticRegression()
clf.fit(X_train, y_train)
clf.predict(X_test)
clf.score(X_test, y_test)

In [None]:
# Should be similar to lasso with the best optimizer.
clf = skl.LogisticRegression(penalty='l1')
clf.fit(X_train, y_train)
clf.predict(X_test)
clf.score(X_test, y_test)