In [1]:
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
import glob
import pandas as pd

In [2]:
!mkdir graphs parameters

## The datafiles are located in /opt/qcdnum-17-01-14/output/

In [3]:
def x_pdf(x, A, B, C , D, E, F, G):
	return A*x**B*(1-x)**C*(1 + D*x + E*x**2 + F*np.log(x) + G*np.log(x)**2)

In [4]:
initial_conditions = pd.DataFrame(
    {
      #            A         ,  B  ,    C ,  D  ,  E ,  F    ,   G   
      'uv':[10.19304899633023, 0.76,   4.6, 0.0 , 2.6, 0.35  , 0.049 ],
     'dv': [5.564255181349489, 0.99,   4.7, 0.0 , 0.0, 0.0   , 0.0],
     'ubar':[    0.14        , -0.33,  11 , 18  , 0.0, 0.071 , 0.0],
     'dbar':[    0.14        , -0.33,  24 , 38  , 0.0, 0.071 , 0.0 ],
     'gl':[0.872978687751462, -0.52,  4.5, 0.0 , 0.0, 0.217 , 0.0112]
    }
)

In [5]:
initial_conditions

Unnamed: 0,uv,dv,ubar,dbar,gl
0,10.193049,5.564255,0.14,0.14,0.872979
1,0.76,0.99,-0.33,-0.33,-0.52
2,4.6,4.7,11.0,24.0,4.5
3,0.0,0.0,18.0,38.0,0.0
4,2.6,0.0,0.0,0.0,0.0
5,0.35,0.0,0.071,0.071,0.217
6,0.049,0.0,0.0,0.0,0.0112


In [6]:
initial_conditions.to_csv('parameters/10.csv', index=False)

In [7]:
data_paths = glob.glob('/opt/qcdnum-17-01-14/output/*.csv')

In [8]:
data_paths

['/opt/qcdnum-17-01-14/output/pruebaCxx_q_2.560000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100.000000.csv']

In [9]:
def get_q2_from_file_path(x):
  q2 = x.split('/')[-1]
  q2 = q2.replace('pruebaCxx_q_','').replace('.csv','').split('.')[0]

  return int(q2)

In [10]:
data_paths=sorted(data_paths, key=get_q2_from_file_path, reverse=False)
data_paths

['/opt/qcdnum-17-01-14/output/pruebaCxx_q_2.560000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_100000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_1000000000.000000.csv',
 '/opt/qcdnum-17-01-14/output/pruebaCxx_q_10000000000.000000.csv']

In [11]:
old_parameters_file_path = 'parameters/10.csv'
for k, file_path in enumerate(data_paths):
  print(100*k/len(data_paths), '%')
  q2 = get_q2_from_file_path(file_path)
  initial_conditions=pd.read_csv(old_parameters_file_path)
  data = pd.read_csv(file_path, delimiter=' ')
  title_data = data.columns[1:]
  fitted_conditions = {}
  for i, parameter in enumerate(initial_conditions.columns):
    print(parameter)
    y = data['x'+parameter]
    x = data['x']
    print(parameter)
    try:
      initial_parameters = initial_conditions[parameter]
      fitted_parameters,_= curve_fit(x_pdf,x,y, p0=initial_parameters, maxfev=1000)
      fitted_conditions[parameter] = fitted_parameters
      plt.figure()
      plt.plot(x,y, label = 'data')
      plt.plot(x,x_pdf(x,*fitted_parameters), label='fit')
      plt.plot(x,x_pdf(x,*initial_parameters), label='inicial')
      plt.ylim(-0.1,1.1)
      plt.legend()
      #plt.ioff()
      image_name = f'graphs/q2_{q2}_param_{parameter}.png'
      plt.savefig(image_name)
      plt.close()
    except:
      for j in range(10):
        rango = 3
        variacion = np.random.random(7)*rango - rango
        initial_parameters = initial_conditions[parameter] +variacion
        
        try: 
          fitted_parameters,_= curve_fit(x_pdf,x,y, p0=initial_parameters, maxfev=1000)
          fitted_conditions[parameter] = fitted_parameters
          #print('parametros encontrados!')
          plt.figure()
          plt.plot(x,y, label = 'data')
          plt.plot(x,x_pdf(x,*fitted_parameters), label='fit')
          plt.plot(x,x_pdf(x,*initial_parameters), label='inicial')
          plt.ylim(-0.1,1.1)
          plt.legend()
          #plt.ioff()
          image_name = f'graphs/q2_{q2}_param_{parameter}.png'
          plt.savefig(image_name)
          plt.close()
          with open('problemas.txt', 'a') as f:
            f.write(f'El parametro {parameter} de los datos {file_path} se logró fittear, pero con algunos problemas, revisar la imagen {image_name}\n')
          break
        except:
          #print('No se encontraron los parametros en la iteracion {}'.format(j))
          if j==9:
            with open('problemas.txt', 'a') as f:
              f.write(f'No se logro fittear la funcion para el parametro {parameter} de los datos {file_path}\n')
            fitted_conditions[parameter] = [0]*7

  fitted_conditions = pd.DataFrame(fitted_conditions)
  fitted_conditions.to_csv(f'parameters/{q2}.csv', index=False)
  old_parameters_file_path = f'parameters/{q2}.csv'

0.0 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar
gl
gl
9.090909090909092 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar
gl
gl
18.181818181818183 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar
gl
gl
27.272727272727273 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar
gl
gl
36.36363636363637 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar
gl
gl
45.45454545454545 %
uv
uv
dv
dv
ubar
ubar




dbar
dbar
gl
gl
54.54545454545455 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar




gl
gl
63.63636363636363 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar




gl
gl
72.72727272727273 %
uv
uv
dv
dv
ubar
ubar
dbar
dbar




gl
gl
81.81818181818181 %
uv
uv
dv
dv
ubar
ubar




dbar
dbar
gl
gl
90.9090909090909 %
uv
uv
dv
dv
ubar
ubar




dbar
dbar




gl
gl


In [12]:
print("Done :)")

Done :)
