# Results from project 1

Task b)

In [1]:
import numpy as np
import pandas as pd
import scipy.constants as consts
import matplotlib.pyplot as plt
from io import StringIO
import glob
import os, sys
import re

In [2]:
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (10, 5),
         'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'large',
         'ytick.labelsize':'large'}
plt.rcParams.update(params)

#### Create appropriate filenames

In [3]:
def createfilename(alphas,path):
    filenames = [] 
    alpha_ = np.around(alphas,decimals = -1)
    for i in range(len(alphas)):
        filenames.append(path + str(int(alpha_[i])) + '.dat')
    return filenames

In [4]:
alphas = np.linspace(300000,700000,9)
path = 'b_n1_d1/b_n1_d1_alpha0.'

In [5]:
files = createfilename(alphas,path)

### Put everthing in a file into a dataframe with selected column names

In [6]:
def matchlist_iter(filename):
    column_names = ['energy', 'acceptance', 'timecpu','solver']
    df = pd.read_csv(filename,skiprows=5,names=column_names,delim_whitespace=True,comment='#')
    return df

##### Read in all the data into one single dataframe

In [7]:
data = []
length = len(files)
realalpha = np.linspace(0.3,0.7,9)
#indexes = list(np.linspace(0,3*length, 3*length+1))
for i in range(length):
    
   # nowindex = indexes[i:i+3]
    d = matchlist_iter(files[i])
    alphanow = np.zeros(len(['energy'])) + realalpha[i]
    d['alpha'] = pd.Series(alphanow,index=d.index)
    data.append(d)
result = pd.concat(data,ignore_index=True)
result


Unnamed: 0,energy,acceptance,timecpu,solver,alpha
0,0.571888,0.988915,3.65444,0,0.3
1,0.539968,0.989642,3.710214,1,0.3
2,0.390369,0.996718,3.684476,2,0.3
3,0.541114,0.988016,3.530099,0,0.35
4,0.544347,0.98786,3.703977,1,0.35
5,0.410811,0.996651,3.77507,2,0.35
6,0.51338,0.987309,3.622487,0,0.4
7,0.513502,0.987279,3.651494,1,0.4
8,0.439269,0.996266,3.663967,2,0.4
9,0.499456,0.987088,3.636815,0,0.45


Slicing result into solver methods

In [8]:
solver = [result[result.solver == 0],result[result.solver == 1],result[result.solver == 2]]


### Solvers

solver[0] = analytic solution  
solver[1] = numerical derivation  
solver[2] = importance sampling

In [9]:
solver[0]

Unnamed: 0,energy,acceptance,timecpu,solver,alpha
0,0.571888,0.988915,3.65444,0,0.3
3,0.541114,0.988016,3.530099,0,0.35
6,0.51338,0.987309,3.622487,0,0.4
9,0.499456,0.987088,3.636815,0,0.45
12,0.5,0.985676,3.589127,0,0.5
15,0.507812,0.986301,3.653402,0,0.55
18,0.513756,0.984834,3.581373,0,0.6
21,0.519535,0.984289,3.739749,0,0.65
24,0.539026,0.983899,3.810217,0,0.7


In [10]:
solver[1]

Unnamed: 0,energy,acceptance,timecpu,solver,alpha
1,0.539968,0.989642,3.710214,1,0.3
4,0.544347,0.98786,3.703977,1,0.35
7,0.513502,0.987279,3.651494,1,0.4
10,0.509835,0.985998,3.768516,1,0.45
13,0.5,0.985286,3.795014,1,0.5
16,0.504358,0.98548,3.78526,1,0.55
19,0.501131,0.983908,3.727102,1,0.6
22,0.519699,0.983897,3.861608,1,0.65
25,0.523957,0.982756,3.768564,1,0.7


In [11]:
solver[2]

Unnamed: 0,energy,acceptance,timecpu,solver,alpha
2,0.390369,0.996718,3.684476,2,0.3
5,0.410811,0.996651,3.77507,2,0.35
8,0.439269,0.996266,3.663967,2,0.4
11,0.467646,0.996058,3.752264,2,0.45
14,0.5,0.995973,3.540494,2,0.5
17,0.532459,0.99549,3.69585,2,0.55
20,0.570595,0.995606,3.551669,2,0.6
23,0.605431,0.995295,3.604347,2,0.65
26,0.641398,0.995116,3.572266,2,0.7


In [12]:
df_colums = [f"${v}$" for v in [r"\alpha",r"\langle E_L \rangle", r"\sigma_b", r"acceptance [\%]", r"t_{CPU} [s]"]]
#df_columns = ["$n$"] + [f"${v}$" for v in [r"I_{max} [antall]", r"\theta_0 [\degree]", r"\sigma_\theta [\degree]", r"I_{0} [antall]"] ]

# Read in blocking data

In [13]:
def data_block(filename):
    column_names = ['alpha', 'sigma_an', 'sigma_num','E_mean_an', 'E_mean_num']
    df = pd.read_csv(filename,skiprows=4,names=column_names,delim_whitespace=True,comment='#')
    return df

In [14]:
block = data_block('b_n1_d1/b_n1_d1_blocking.dat')
block

Unnamed: 0,alpha,sigma_an,sigma_num,E_mean_an,E_mean_num
0,0.3,0.13814,0.1179972,0.571888,0.539968
1,0.35,0.071833,0.0716787,0.541114,0.544347
2,0.4,0.02764,0.02541967,0.51338,0.513502
3,0.45,0.004763,0.00774509,0.499456,0.509835
4,0.5,0.0,-8.049117e-16,0.5,0.5
5,0.55,0.003628,0.004159931,0.507812,0.504357
6,0.6,0.013992,0.01878866,0.513756,0.501131
7,0.65,0.036136,0.0295313,0.519535,0.519699
8,0.7,0.050862,0.05499213,0.539025,0.523957


#### Analytical data

In [15]:
a = solver[0]
#reset index 

a = a.reset_index(drop=True)
analytic = pd.concat([block[['alpha','E_mean_an','sigma_an']],a[['acceptance','timecpu']]],axis=1)
analytic

Unnamed: 0,alpha,E_mean_an,sigma_an,acceptance,timecpu
0,0.3,0.571888,0.13814,0.988915,3.65444
1,0.35,0.541114,0.071833,0.988016,3.530099
2,0.4,0.51338,0.02764,0.987309,3.622487
3,0.45,0.499456,0.004763,0.987088,3.636815
4,0.5,0.5,0.0,0.985676,3.589127
5,0.55,0.507812,0.003628,0.986301,3.653402
6,0.6,0.513756,0.013992,0.984834,3.581373
7,0.65,0.519535,0.036136,0.984289,3.739749
8,0.7,0.539025,0.050862,0.983899,3.810217


#### Numerical data

In [16]:
n = solver[0]
n = n.reset_index(drop=True)
numerical = pd.concat([block[['alpha','E_mean_num','sigma_num']],n[['acceptance','timecpu']]],axis=1)
numerical

Unnamed: 0,alpha,E_mean_num,sigma_num,acceptance,timecpu
0,0.3,0.539968,0.1179972,0.988915,3.65444
1,0.35,0.544347,0.0716787,0.988016,3.530099
2,0.4,0.513502,0.02541967,0.987309,3.622487
3,0.45,0.509835,0.00774509,0.987088,3.636815
4,0.5,0.5,-8.049117e-16,0.985676,3.589127
5,0.55,0.504357,0.004159931,0.986301,3.653402
6,0.6,0.501131,0.01878866,0.984834,3.581373
7,0.65,0.519699,0.0295313,0.984289,3.739749
8,0.7,0.523957,0.05499213,0.983899,3.810217


## Tables to latex format

In [17]:
df_columns = [f"${v}$" for v in [r"\alpha",r"\langle E_L \rangle", r"\sigma_b", r"\text{acceptance } [\%]", r"t_{CPU} [s]"]]

In [18]:
def latex(df,colname):
    df = df.rename(columns=dict(zip(df, colname)))
    table = df.to_latex(index=False,escape=False,column_format=(1+ len(colname))*'c')
    table = table.replace("toprule", "hline \hline")
    table = table.replace("bottomrule", "hline \hline")
    table = table.replace("midrule", "hline")
    s = r'''\begin{table}[H]
    \centering
    \caption{}
    \label{tab:}
    '''
    table = s + table 
    table = table +'\end{table}'
    return table

In [19]:
t = latex(analytic,df_columns)
print(t)

\begin{table}[H]
    \centering
    \caption{}
    \label{tab:}
    \begin{tabular}{cccccc}
\hline \hline
 $\alpha$ &  $\langle E_L \rangle$ &  $\sigma_b$ &  $\text{acceptance } [\%]$ &  $t_{CPU} [s]$ \\
\hline
     0.30 &               0.571888 &    0.138140 &                   0.988915 &       3.654440 \\
     0.35 &               0.541114 &    0.071833 &                   0.988016 &       3.530099 \\
     0.40 &               0.513380 &    0.027640 &                   0.987309 &       3.622487 \\
     0.45 &               0.499456 &    0.004763 &                   0.987088 &       3.636815 \\
     0.50 &               0.500000 &    0.000000 &                   0.985676 &       3.589127 \\
     0.55 &               0.507812 &    0.003628 &                   0.986301 &       3.653402 \\
     0.60 &               0.513756 &    0.013992 &                   0.984834 &       3.581373 \\
     0.65 &               0.519535 &    0.036136 &                   0.984289 &       3.739749 \\
     

In [20]:
l = latex(numerical, df_columns)
print(l)

\begin{table}[H]
    \centering
    \caption{}
    \label{tab:}
    \begin{tabular}{cccccc}
\hline \hline
 $\alpha$ &  $\langle E_L \rangle$ &    $\sigma_b$ &  $\text{acceptance } [\%]$ &  $t_{CPU} [s]$ \\
\hline
     0.30 &               0.539968 &  1.179972e-01 &                   0.988915 &       3.654440 \\
     0.35 &               0.544347 &  7.167870e-02 &                   0.988016 &       3.530099 \\
     0.40 &               0.513502 &  2.541967e-02 &                   0.987309 &       3.622487 \\
     0.45 &               0.509835 &  7.745090e-03 &                   0.987088 &       3.636815 \\
     0.50 &               0.500000 & -8.049117e-16 &                   0.985676 &       3.589127 \\
     0.55 &               0.504357 &  4.159931e-03 &                   0.986301 &       3.653402 \\
     0.60 &               0.501131 &  1.878866e-02 &                   0.984834 &       3.581373 \\
     0.65 &               0.519699 &  2.953130e-02 &                   0.984289 &      

In [21]:

plt.plot(analytic['alpha'],analytic['E_mean_an'],'o-',color='xkcd:violet')
plt.plot(numerical['alpha'],numerical['E_mean_num'],'o-', color='xkcd:deep aqua')
plt.ylabel(r'$\langle E_L \rangle$',fontsize=15)
plt.xlabel(r'$\alpha$', fontsize=15)

Text(0.5,0,'$\\alpha$')

# Generalized plotting stuff

In [22]:
def plotstuff(datax,datay,errors,legend_,xname=r'$\alpha$',yname=r'$\langle E_L \rangle$'):
    for i in range(len(datax)):
        plt.errorbar(datax[i],datay[i],errors[i],capsize=5,fmt='o-',label=legend_[i],elinewidth=1)
    plt.legend(fontsize=15)
    plt.xlabel(xname)
    plt.ylabel(yname)

In [23]:
x = [analytic['alpha'],numerical['alpha']]
y = [analytic['E_mean_an'], numerical['E_mean_num']]
sigmas = [np.sqrt(analytic['sigma_an']),np.sqrt(abs(numerical['sigma_num'])]
legend = ['Analytical','Numerical']

SyntaxError: invalid syntax (<ipython-input-23-a977db396df7>, line 3)

In [24]:
plotstuff(x,y,sigmas,legend)

NameError: name 'x' is not defined

# N = 10, 100, 500

In [25]:
ns = [10,100,500]

In [26]:
def create_fpath(N, dim=1):
    f = []
    for n in range(len(N)):
        f.append('b_n' + str(N[n]) + '_d'+str(dim) + '/')
    return f

In [27]:
fn = create_fpath(ns)

In [28]:
def create_fn(N,filepath,dim=1):
    an = []
    num = []
    main = []
    for n in range(len(N)):
        main.append(filepath[n]+'b_n' + str(N[n]) + '_d'+str(dim) + '_alpha0.500000.dat')
        num.append(filepath[n]+'Enb_n' + str(N[n]) + '_d'+str(dim) + '_alpha0.500000.dat')
        an.append(filepath[n]+'Eab_n' + str(N[n]) + '_d'+str(dim) + '_alpha0.500000.dat')
        
    return an, num, main

In [29]:
fuk_an,fuk_num,fuk_main = create_fn(ns, fn)
print(fuk_an,fuk_num,fuk_main)

['b_n10_d1/Eab_n10_d1_alpha0.500000.dat', 'b_n100_d1/Eab_n100_d1_alpha0.500000.dat', 'b_n500_d1/Eab_n500_d1_alpha0.500000.dat'] ['b_n10_d1/Enb_n10_d1_alpha0.500000.dat', 'b_n100_d1/Enb_n100_d1_alpha0.500000.dat', 'b_n500_d1/Enb_n500_d1_alpha0.500000.dat'] ['b_n10_d1/b_n10_d1_alpha0.500000.dat', 'b_n100_d1/b_n100_d1_alpha0.500000.dat', 'b_n500_d1/b_n500_d1_alpha0.500000.dat']


In [78]:
def read_Ean_2_df(filenamesmain,filenamean,filenamenum,Ns,dims=1):
    #geninfo = matchlist_iter(filenames[0])
    newDF = pd.DataFrame() #creates a new dataframe that's empty
    mainDF = pd.DataFrame()
    column_names = [['energy_num'],['energy_an']]
    print(Ns)
    energy = pd.DataFrame()
    for i in range(len(filenamesmain)):
        temp = pd.DataFrame()
        main = matchlist_iter(filenamesmain[i])
        main = main.assign(N=Ns[i]*np.ones(len(main)))
        main = main.assign(dim=dims * np.ones(len(main)))
        temp['E_an'] = pd.read_csv(filenamean[i])
        temp = temp.assign(N=Ns[i]*np.ones(len(temp)))
        temp = temp.assign(dim=dims * np.ones(len(temp)))
        mainDF = mainDF.append(main,ignore_index=True)
        temp['E_n'] = pd.read_csv(filenamenum[i],delim_whitespace=True,usecols=[0])
        energy = energy.append(temp,ignore_index=True)
        #newDF = newDF.append(df, ignore_index = True)
    return mainDF, energy


In [66]:
def name_dat_file(directory):
    filenames = [f for f in glob.glob(directory + '*.dat')]
    return filenames

In [67]:
def tryint(s):
    try:
        return int(s)
    except:
        return s

def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)
    return l
    

In [68]:
d3 = name_dat_file('b_d3/')
d2 = name_dat_file('b_d2/')
sort_nicely(d2)
sort_nicely(d3)
no = [1,10,100,500]

In [79]:
d3main, d3en = read_Ean_2_df(d3[8::],d3[0:4],d3[4:8],no,dims=3)

[1, 10, 100, 500]


In [80]:
d3en

Unnamed: 0,E_an,N,dim,E_n
0,1.5,1.0,3.0,1.500615
1,1.5,1.0,3.0,1.501280
2,1.5,1.0,3.0,1.500475
3,1.5,1.0,3.0,1.499799
4,1.5,1.0,3.0,1.498857
5,1.5,1.0,3.0,1.499821
6,1.5,1.0,3.0,1.501172
7,1.5,1.0,3.0,1.502698
8,1.5,1.0,3.0,1.503245
9,1.5,1.0,3.0,1.501320


In [91]:
d2main, d2en = read_Ean_2_df(d2[8::],d2[0:4],d2[4:8],no,dims=2)

[1, 10, 100, 500]


In [92]:
d2en

Unnamed: 0,E_an,N,dim,E_n
0,1.0,1.0,2.0,1.000319
1,1.0,1.0,2.0,1.000319
2,1.0,1.0,2.0,0.999049
3,1.0,1.0,2.0,0.998494
4,1.0,1.0,2.0,1.000125
5,1.0,1.0,2.0,1.001258
6,1.0,1.0,2.0,1.001783
7,1.0,1.0,2.0,1.002944
8,1.0,1.0,2.0,1.003571
9,1.0,1.0,2.0,1.003493


In [39]:
def block_mean(vec):
    return sum(vec)/len(vec)

def meanAndVariance(vec):
    mean = np.mean(vec)
#     var = np.var(vec)
#     mean = sum(vec)/len(vec)
    var = sum([i ** 2 for i in vec])/len(vec) - mean*mean
    return mean, var

In [40]:
def everything_is_awesome(data):
    n_blocks = 200
    block_size_min = 100
    block_size_max = len(data)/100
    block_step = int ((block_size_max - block_size_min + 1) / n_blocks)
    mean_vec = []
    var_vec = []
    block_sizes = []
    for i in range(0, n_blocks):
        mean_temp_vec = []
        start_point = 0
        end_point = block_size_min + block_step*i
        block_size = end_point
        block_sizes.append(block_size)

    mean_temp_vec.append(block_mean(data[start_point:end_point]))
    start_point = end_point
    end_point += block_size_min + block_step*i
    mean, var = meanAndVariance(mean_temp_vec)
    mean_vec.append(mean)
    var_vec.append(np.sqrt(var/(len(data)/float(block_size) - 1.0)))

    mean, var = meanAndVariance(data)

    return mean,var
    

In [41]:
main, en = read_Ean_2_df(fuk_main,fuk_an,fuk_num,ns)
#main = main.assign(mean = np.ones(len(main.N)))
#main = main.assign(std_b = np.ones(len(main.N)))
tempmean_an = tempmean_num = 0
tempvar_an = tempvar_num = 0
meanlist = []
stdlist = []
for i in range(len(ns)):
    #energyan = en[en.N==ns[i]]
    print(ns[i])
    tempmean_an, tempvar_an = everything_is_awesome(en[en.N==ns[i]]['E_an'])
    tempmean_num, tempvar_num = everything_is_awesome(en[en.N==ns[i]]['E_n'])
    meanlist.append(tempmean_an)
    meanlist.append(tempmean_num)
    stdlist.append(np.sqrt(tempvar_an))
    stdlist.append(np.sqrt(tempvar_num))
main['std_b'] = stdlist
#main['E_mean'] = meanlist

[10, 100, 500]
10
100
500


In [42]:
mainsolveran = main[main.solver == 0].reset_index(drop=True)
mainsolvernum = main[main.solver == 1].reset_index(drop=True)


In [43]:
mainsolveran


Unnamed: 0,energy,acceptance,timecpu,solver,N,dim,std_b
0,5.0,0.985796,4.999671,0,10.0,1.0,0.0
1,50.0,0.985887,60.58452,0,100.0,1.0,0.0
2,250.0,0.985908,1225.942,0,500.0,1.0,0.0


In [44]:
mainsolvernum

Unnamed: 0,energy,acceptance,timecpu,solver,N,dim,std_b
0,4.87966,0.98614,6.173081,1,10.0,1.0,3.448885
1,47.87102,0.985838,168.8052,1,100.0,1.0,38.077691
2,255.1006,0.985899,2914.487,1,500.0,1.0,181.319741


In [55]:
#en[en.N==500]

In [53]:
#en[en.N==10]

In [54]:
#en[en.N==100]

In [48]:
cols = mainsolveran.columns.tolist()
print(cols)

newcols = ['dim','N','energy', 'std_b','acceptance', 'solver']

['energy', 'acceptance', 'timecpu', 'solver', 'N', 'dim', 'std_b']


vil ha i tabellen  

dim | N | energy | std |acceptance | timecpu

In [49]:
def col_on_me(data):
    newcols = ['dim','N','energy', 'std_b','acceptance', 'solver']
    return data[newcols]

one-body densities

poenget med den, se korrelasjonen i systemet ditt  

formen sier noe om hvordan systemet fungerer  

skal bli en gauss

In [50]:
mainsolveran = col_on_me(mainsolveran)

In [87]:
def add_block(dfmain,dfen,n):
    tempmean_an = tempmean_num = 0
    tempvar_an = tempvar_num = 0
    meanlist = []
    stdlist = []
    for i in range(len(n)):
        #energyan = en[en.N==ns[i]]
        print(n[i])
        tempmean_an, tempvar_an = everything_is_awesome(dfen[dfen.N==n[i]]['E_an'])
        tempmean_num, tempvar_num = everything_is_awesome(dfen[dfen.N==n[i]]['E_n'])
        meanlist.append(tempmean_an)
        meanlist.append(tempmean_num)
        stdlist.append(np.sqrt(tempvar_an))
        stdlist.append(np.sqrt(tempvar_num))
    dfmain['std_b'] = stdlist
    return dfmain

In [88]:
d3main= add_block(d3main,d3en,no)


1
10
100
500


In [89]:
d3main

Unnamed: 0,energy,acceptance,timecpu,solver,N,dim,std_b
0,1.5,0.984964,2.732921,0,1.0,3.0,0.0
1,1.500331,0.985972,3.082563,1,1.0,3.0,0.034112
2,15.0,0.976646,0.526204,0,10.0,3.0,0.0
3,14.9304,0.976497,3.966555,1,10.0,3.0,1.365341
4,150.0,0.845978,1.363964,0,100.0,3.0,0.0
5,154.009,0.846988,6.711756,1,100.0,3.0,1.48477
6,750.0,0.434706,5.852499,0,500.0,3.0,0.0
7,744.5919,0.436138,17.569,1,500.0,3.0,0.316331


In [93]:
d2main = add_block(d2main,d2en,no)

1
10
100
500
