V1.0

Initial version of library: Classes added: - DataSet - DataFit - PascoParser Added global_enums and global_funcs
jeanyvesb9 · May 26, 2018 · 69c1f14 · 69c1f14
1 parent 8499b91
commit 69c1f14
Show file tree

Hide file tree

Showing 11 changed files with 629 additions and 2 deletions.
diff --git a/Library_plan.txt b/Library_plan.txt
@@ -0,0 +1,139 @@
+DataAnalysis-Lib
+
+func roundError:
+    Args:
+        error:
+    returns rounded error to 1st signifficant digit
+
+func roundToError:
+    Args:
+        x
+        error
+    returns x rounded to error
+
+func pprintDataFitSet:
+    Args:
+        list
+    prints a panda.DataFrame with all the fits. Assumes the same name for all params.
+
+class DataSet:
+    Properties:
+        x
+        y
+        xError
+        yError
+        name
+
+    __init__:
+        Args:
+            x
+            y
+            xError = None
+            xErrorFn = None
+            yError = None
+            yErrorFn = None
+            name = ''
+            xLabel = ''
+            yLabel = ''
+            xUnits = None
+            yUnits = None
+        if xErrorFn is not None, then if xError is not None calculate xError property. Otherwise throw warning.
+        if yErrorFn is not None, then if yError is not None calculate yError property. Otherwise throw warning.
+
+    cut:
+        Args:
+            initialIndex = None
+            finalIndex = None
+    purge:
+        Args:
+        step // step >= 1
+    remove:
+        Args:
+            index //can be int or list
+    indexAtX:
+        Args:
+            value
+            exact = True //if False, then return list with closest element
+        returns list
+    indexAtY:
+        Args:
+            value
+            exact = True //if False, then return list with closest element
+        returns list
+
+
+    getMean:
+        returns mean of y
+    getStdDev:
+        returns Standard Deviation of y
+    getStdDevOfMean:
+        returns Standard Deviation of the Mean of y, also calles Standard Error of the Mean, or simply Standard Error
+    getWeightedMean:
+        returns weighted mean
+    getWeightedMeanError:
+        returns weighted mean error
+
+    quickPlot:
+        Args:
+            plotType = PlotType.ErrorBar
+            step = 0
+        plots the dataset
+        returns fig, ax
+
+    dataFrame:
+        Args:
+            rounded = True
+            xSeparatedError = False
+            xRelativeError = False
+            ySeparatedError = False
+            yRelativeError = False
+            saveCSVFile = None
+            csvSep = ','
+            csvDecimal = '.'
+        returns panda.DataFrame with the class data. If separatedErrors, put xError ans yError on separated columns
+        saves csv to saveFile if saveFile is not None
+
+class DataFit:
+    Properties:
+        data
+        fn
+        initialConditions
+        fitObj
+        paramsName
+
+    __init__:
+        Args:
+            data
+            fn
+            initialConditions
+            paramsName = []
+            method = 'odr'
+
+        TODO: check for initialConditions size
+
+    getR2:
+        returns R-squared
+    getFitFn:
+        returns fn with params applied
+    dataFrame:
+        Args:
+            rounded = True
+        prints a panda.DataFrame with the fit data
+
+class PascoParser:
+    Properties:
+        csvFile
+
+    __init__:
+        Args:
+            csv_file
+            separator
+            decimal
+            numberOfColumnsPerSeries
+
+    parseDataSeries:
+        Args:
+            seriesNumber
+            xValue
+            yValue
+        returns (xvalues, yvalues)
diff --git a/README.md b/README.md
@@ -1,2 +1,2 @@
-# DataAnalysis-Lib
-Open Source library developed for in-lab easy data analysis and processing. Intended for student use in experimental Physics labs. Developed by @jeanyvesb9 and @agustin
+# DataAnalysisLib
+Open Source library developed for in-lab easy data analysis and processing. Intended for student use in experimental Physics labs. Developed by @jeanyvesb9 and @aguscaputobugallo
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,6 @@
+from dataset import DataSet
+from datafit import DataFit
+from pascoparser import PascoParser
+from global_enums import PlotType, FitMethods
+from global_funcs import createSeriesPanda, findNearestValueIndex, reportManyFits, roundToError, \
+                            roundToFirstSignifficantDigit
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/__pycache__/ajuste.cpython-36.pyc b/__pycache__/ajuste.cpython-36.pyc
diff --git a/__pycache__/library.cpython-36.pyc b/__pycache__/library.cpython-36.pyc
diff --git a/datafit.py b/datafit.py
@@ -0,0 +1,126 @@
+import warnings
+import functools
+
+import numpy as np
+import scipy.odr as odr
+import matplotlib.pyplot as plt
+import pandas as pd
+
+import global_funcs
+import global_enums
+
+
+class DataFit(object):
+    def __init__(self, data, fn, initialParams, paramNames = None, paramUnits = None, method = global_enums.FitMethods.ODR):
+        self.data = data
+        self.fn = fn
+        self.initialParams = initialParams
+        self.method = method
+
+        xError = self.data.xError if np.count_nonzero(self.data.xError) != 0 else None
+        yError = self.data.yError if np.count_nonzero(self.data.yError) != 0 else None
+
+        self.fitObj = None
+        self.fitParams = None
+        self.fitParamsStdError = None
+        self.reducedChi2 = None
+        self.R2 = None
+
+        if self.method == global_enums.FitMethods.ODR:
+            Rdata = odr.RealData(self.data.x, self.data.y, xError, yError)
+            self.fitObj = odr.ODR(Rdata, odr.Model(self.fn), self.initialParams).run()
+
+            self.fitParams = self.fitObj.beta
+            self.fitParamsStdError = self.fitObj.sd_beta
+
+            #The following 2 lines raise warnings in pylint. The code is OK tough.
+            self.reducedChi2 = self.fitObj.res_var #See http://mail.scipy.org/pipermail/scipy-user/2012-May/032207.html
+            self.R2 = 1 - np.sum(self.fitObj.eps**2)/self.fitObj.sum_square if np.argwhere(np.array(self.fitObj.sd_beta) == 0).size == 0  else 1
+
+        self.paramNames = ['$B_{' + str(i) + '}$' for i in range(len(self.fitParams))]
+        if paramNames is not None:
+            if len(paramNames) != len(self.fitParams):
+                warnings.warn('len(paramsName) != len(fitParams): Default parameter names selected.')
+            else:
+                seen = set()
+                flag = False
+                for name in paramNames:
+                    if name not in seen:
+                        seen.add(name)
+                    else:
+                        flag = True
+                if flag:
+                    warnings.warn('Found repeated values in paramNames: Default parameter names selected.')
+                else:
+                    self.paramNames = paramNames
+        self.paramUnits = paramUnits
+
+    def getFitFn(self):
+        return functools.partial(self.fn, self.fitParams)
+
+    def quickPlot(self, plotType = global_enums.PlotType.ErrorBar, purgeStep = 1):
+        if purgeStep <= 0:
+            warnings.warn('purgeStep has to be at least 1. Setting purgeStep = 1.')
+            purgeStep = 1
+        fig , ax = plt.subplots(1,1)
+        if plotType == global_enums.PlotType.ErrorBar:
+            ax.errorbar(self.data.x[::purgeStep], self.data.y[::purgeStep], xerr = self.data.xError[::purgeStep], \
+                        yerr = self.data.yError[::purgeStep], fmt = 's')
+        elif plotType == global_enums.PlotType.Line:
+            ax.plot(self.data.x[::purgeStep], self.data.y[::purgeStep], '-')
+        elif plotType == global_enums.PlotType.Point:
+            ax.plot(self.data.x[::purgeStep], self.data.y[::purgeStep], 's')
+
+        x = np.linspace(self.data.x[0], self.data.x[-1], 1000)
+        ax.plot(x, self.getFitFn()(x))
+
+        ax.set_xlabel(self.data.xLabel if self.data.xUnits is None else self.data.xLabel + ' (' + self.data.xUnits + ')')
+        ax.set_ylabel(self.data.yLabel if self.data.yUnits is None else self.data.yLabel + ' (' + self.data.yUnits + ')')
+        ax.set_title(self.data.name)
+        return fig, ax
+
+    def dataFrame(self, rounded = True, separatedError = False, relativeError = False, saveCSVFile = None, CSVSep = ',', CSVDecimal = '.'):
+        perrors = [global_funcs.roundToFirstSignifficantDigit(x) for x in self.fitParamsStdError] if rounded else self.fitParamsStdError
+        pvalues = [global_funcs.roundToError(self.fitParams[i], perrors[i]) for i in range(len(self.fitParams))] if rounded else self.fitParams
+
+        R2col = [np.round(self.R2, 5)]
+        rowNames = ['B']
+        if separatedError:
+            rowNames += ['$\\Delta B$']
+            R2col += ['-']
+        if relativeError:
+            rowNames += ['$\\Delta B$ (rel)']
+            R2col += ['-']
+        rowNames += ['$B_0$']
+        R2col += ['-']
+
+        colNames = []
+        if self.paramUnits is not None:
+            colNames = [self.paramNames[i] + ' (' + self.paramUnits[i] + ')' if self.paramUnits[i] != '' \
+                        else self.paramNames[i] for i in range(len(self.paramNames))]
+        else:
+            colNames = self.paramNames
+        colNames.append('$R^2$')
+
+        tblCols = {}
+        for i in range(len(pvalues)):
+            if relativeError:
+                relError = perrors[i]/pvalues[i] if pvalues[i] != 0 else '-'
+                if separatedError:
+                    tblCols[colNames[i]] = [pvalues[i], perrors[i], relError, self.initialParams[i]]
+                else:
+                    tblCols[colNames[i]] = [str(pvalues[i]) + ' +/- ' + str(perrors[i]), relError, self.initialParams[i]]
+            else:
+                if separatedError:
+                    tblCols[colNames[i]] = [pvalues[i], perrors[i], self.initialParams[i]]
+                else:
+                    tblCols[colNames[i]] = [str(pvalues[i]) + ' +/- ' + str(perrors[i]), self.initialParams[i]]
+
+        tblCols['$R^2$'] = R2col
+
+        table = pd.DataFrame(tblCols, columns = colNames, index = rowNames)
+
+        if saveCSVFile is not None:
+            table.to_csv(saveCSVFile, sep = CSVSep, decimal = CSVDecimal)
+
+        return table