Merge branch 'master' of https://github.com/pandas-dev/pandas into di…

…v_zero2
harisbal · Jan 31, 2018 · 965f721 · 965f721
2 parents d648ef6 + fb3b237
commit 965f721
Show file tree

Hide file tree

Showing 29 changed files with 585 additions and 458 deletions.
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
@@ -13,7 +13,7 @@ lxml
 html5lib
 jinja2
 sqlalchemy
-pymysql
+pymysql<0.8.0
 feather-format
 pyarrow
 psycopg2

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -154,7 +154,7 @@ Selection
    While standard Python / Numpy expressions for selecting and setting are
    intuitive and come in handy for interactive work, for production code, we
    recommend the optimized pandas data access methods, ``.at``, ``.iat``,
-   ``.loc``, ``.iloc`` and ``.ix``.
+   ``.loc`` and ``.iloc``.
 
 See the indexing documentation :ref:`Indexing and Selecting Data <indexing>` and :ref:`MultiIndex / Advanced Indexing <advanced>`.
 

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -373,7 +373,7 @@ Deprecations
 - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`)
 - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`).
 - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`)
-
+- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`)
 
 .. _whatsnew_0230.prior_deprecations:
 
@@ -464,6 +464,7 @@ Datetimelike
 - Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)
 - Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`)
 - Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
+- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
 
 Timezones
 ^^^^^^^^^

diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h
@@ -18,33 +18,6 @@ The full license is in the LICENSE file, distributed with this software.
 
 PANDAS_INLINE npy_int64 get_nat(void) { return NPY_MIN_INT64; }
 
-PANDAS_INLINE int is_integer_object(PyObject* obj) {
-    return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj);
-}
-
-PANDAS_INLINE int is_float_object(PyObject* obj) {
-    return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating));
-}
-PANDAS_INLINE int is_complex_object(PyObject* obj) {
-    return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating));
-}
-
-PANDAS_INLINE int is_bool_object(PyObject* obj) {
-    return (PyBool_Check(obj) || PyArray_IsScalar(obj, Bool));
-}
-
-PANDAS_INLINE int is_string_object(PyObject* obj) {
-    return (PyString_Check(obj) || PyUnicode_Check(obj));
-}
-
-PANDAS_INLINE int is_datetime64_object(PyObject* obj) {
-    return PyArray_IsScalar(obj, Datetime);
-}
-
-PANDAS_INLINE int is_timedelta64_object(PyObject* obj) {
-    return PyArray_IsScalar(obj, Timedelta);
-}
-
 PANDAS_INLINE int assign_value_1d(PyArrayObject* ap, Py_ssize_t _i,
                                   PyObject* v) {
     npy_intp i = (npy_intp)_i;
@@ -80,17 +53,4 @@ void set_array_not_contiguous(PyArrayObject* ao) {
     ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS);
 }
 
-// If arr is zerodim array, return a proper array scalar (e.g. np.int64).
-// Otherwise, return arr as is.
-PANDAS_INLINE PyObject* unbox_if_zerodim(PyObject* arr) {
-    if (PyArray_IsZeroDim(arr)) {
-        PyObject* ret;
-        ret = PyArray_ToScalar(PyArray_DATA(arr), arr);
-        return ret;
-    } else {
-        Py_INCREF(arr);
-        return arr;
-    }
-}
-
 #endif  // PANDAS__LIBS_SRC_NUMPY_HELPER_H_
diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c
@@ -1275,38 +1275,6 @@ npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) {
     return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET;
 }
 
-char *str_replace(const char *s, const char *old, const char *new) {
-    char *ret;
-    int i, count = 0;
-    size_t newlen = strlen(new);
-    size_t oldlen = strlen(old);
-
-    for (i = 0; s[i] != '\0'; i++) {
-        if (strstr(&s[i], old) == &s[i]) {
-            count++;
-            i += oldlen - 1;
-        }
-    }
-
-    ret = PyArray_malloc(i + 1 + count * (newlen - oldlen));
-    if (ret == NULL) {
-        return (char *)PyErr_NoMemory();
-    }
-
-    i = 0;
-    while (*s) {
-        if (strstr(s, old) == s) {
-            strncpy(&ret[i], new, sizeof(char) * newlen);
-            i += newlen;
-            s += oldlen;
-        } else {
-            ret[i++] = *s++;
-        }
-    }
-    ret[i] = '\0';
-
-    return ret;
-}
 
 // function to generate a nice string representation of the period
 // object, originally from DateObject_strftime

diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h
@@ -112,15 +112,6 @@ frequency conversion routines.
 
 #define INT_ERR_CODE INT32_MIN
 
-#define MEM_CHECK(item)          \
-    if (item == NULL) {          \
-        return PyErr_NoMemory(); \
-    }
-#define ERR_CHECK(item) \
-    if (item == NULL) { \
-        return NULL;    \
-    }
-
 typedef struct asfreq_info {
     int from_week_end;  // day the week ends on in the "from" frequency
     int to_week_end;    // day the week ends on in the "to" frequency
@@ -182,7 +173,6 @@ int pminute(npy_int64 ordinal, int freq);
 int psecond(npy_int64 ordinal, int freq);
 int pdays_in_month(npy_int64 ordinal, int freq);
 
-double getAbsTime(int freq, npy_int64 dailyDate, npy_int64 originalDate);
 char *c_strftime(struct date_info *dinfo, char *fmt);
 int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year);
 

diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd
@@ -1,24 +1,76 @@
-from numpy cimport ndarray
+from numpy cimport ndarray, NPY_C_CONTIGUOUS, NPY_F_CONTIGUOUS
 cimport numpy as cnp
+cnp.import_array()
+
 cimport cpython
+from cpython cimport PyTypeObject
+
+cdef extern from "Python.h":
+    # Note: importing extern-style allows us to declare these as nogil
+    # functions, whereas `from cpython cimport` does not.
+    bint PyUnicode_Check(object obj) nogil
+    bint PyString_Check(object obj) nogil
+    bint PyBool_Check(object obj) nogil
+    bint PyFloat_Check(object obj) nogil
+    bint PyComplex_Check(object obj) nogil
+    bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
+
+
+cdef extern from "numpy/arrayobject.h":
+    PyTypeObject PyFloatingArrType_Type
+
+cdef extern from "numpy/ndarrayobject.h":
+    PyTypeObject PyTimedeltaArrType_Type
+    PyTypeObject PyDatetimeArrType_Type
+    PyTypeObject PyComplexFloatingArrType_Type
+    PyTypeObject PyBoolArrType_Type
+
+    bint PyArray_IsIntegerScalar(obj) nogil
+    bint PyArray_Check(obj) nogil
+
+# --------------------------------------------------------------------
+# Type Checking
+
+cdef inline bint is_string_object(object obj) nogil:
+    return PyString_Check(obj) or PyUnicode_Check(obj)
+
+
+cdef inline bint is_integer_object(object obj) nogil:
+    return not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj)
+
+
+cdef inline bint is_float_object(object obj) nogil:
+    return (PyFloat_Check(obj) or
+            (PyObject_TypeCheck(obj, &PyFloatingArrType_Type)))
+
 
+cdef inline bint is_complex_object(object obj) nogil:
+    return (PyComplex_Check(obj) or
+            PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type))
+
+
+cdef inline bint is_bool_object(object obj) nogil:
+    return (PyBool_Check(obj) or
+            PyObject_TypeCheck(obj, &PyBoolArrType_Type))
+
+
+cdef inline bint is_timedelta64_object(object obj) nogil:
+    return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type)
+
+
+cdef inline bint is_datetime64_object(object obj) nogil:
+    return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type)
+
+# --------------------------------------------------------------------
 
 cdef extern from "numpy_helper.h":
     void set_array_not_contiguous(ndarray ao)
 
-    int is_integer_object(object)
-    int is_float_object(object)
-    int is_complex_object(object)
-    int is_bool_object(object)
-    int is_string_object(object)
-    int is_datetime64_object(object)
-    int is_timedelta64_object(object)
     int assign_value_1d(ndarray, Py_ssize_t, object) except -1
     cnp.int64_t get_nat()
     object get_value_1d(ndarray, Py_ssize_t)
     char *get_c_string(object) except NULL
     object char_to_string(char*)
-    object unbox_if_zerodim(object arr)
 
 ctypedef fused numeric:
     cnp.int8_t
@@ -112,3 +164,22 @@ cdef inline bint _checknan(object val):
 
 cdef inline bint is_period_object(object val):
     return getattr(val, '_typ', '_typ') == 'period'
+
+
+cdef inline object unbox_if_zerodim(object arr):
+    """
+    If arr is zerodim array, return a proper array scalar (e.g. np.int64).
+    Otherwise, return arr as is.
+
+    Parameters
+    ----------
+    arr : object
+
+    Returns
+    -------
+    result : object
+    """
+    if cnp.PyArray_IsZeroDim(arr):
+        return cnp.PyArray_ToScalar(cnp.PyArray_DATA(arr), arr)
+    else:
+        return arr
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -372,15 +372,6 @@ cdef object _period_strftime(int64_t value, int freq, object fmt):
 ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN
 
 
-def get_period_field(int code, int64_t value, int freq):
-    cdef accessor f = _get_accessor_func(code)
-    if f is NULL:
-        raise ValueError('Unrecognized period code: %d' % code)
-    if value == iNaT:
-        return np.nan
-    return f(value, freq)
-
-
 def get_period_field_arr(int code, ndarray[int64_t] arr, int freq):
     cdef:
         Py_ssize_t i, sz

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -313,7 +313,7 @@ def _constructor(self):
 
     _constructor_sliced = Series
     _deprecations = NDFrame._deprecations | frozenset(
-        ['sortlevel', 'get_value', 'set_value', 'from_csv'])
+        ['sortlevel', 'get_value', 'set_value', 'from_csv', 'from_items'])
 
     @property
     def _constructor_expanddim(self):
@@ -1246,6 +1246,12 @@ def to_records(self, index=True, convert_datetime64=True):
     @classmethod
     def from_items(cls, items, columns=None, orient='columns'):
         """
+        .. deprecated:: 0.23.0
+            from_items is deprecated and will be removed in a
+            future version. Use :meth:`DataFrame.from_dict(dict())`
+            instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used
+            to preserve the key order.
+
         Convert (key, value) pairs to DataFrame. The keys will be the axis
         index (usually the columns, but depends on the specified
         orientation). The values should be arrays or Series.
@@ -1266,6 +1272,13 @@ def from_items(cls, items, columns=None, orient='columns'):
         -------
         frame : DataFrame
         """
+
+        warnings.warn("from_items is deprecated. Please use "
+                      "DataFrame.from_dict(dict()) instead. "
+                      "DataFrame.from_dict(OrderedDict()) may be used to "
+                      "preserve the key order.",
+                      FutureWarning, stacklevel=2)
+
         keys, values = lzip(*items)
 
         if orient == 'columns':

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7315,6 +7315,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
 
         rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis,
                                   **kwargs)) - 1)
+        rs = rs.reindex_like(data)
         if freq is None:
             mask = isna(com._values_from_object(self))
             np.putmask(rs.values, mask, np.nan)

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -230,7 +230,7 @@ def make_block_same_class(self, values, placement=None, ndim=None,
         if dtype is not None:
             # issue 19431 fastparquet is passing this
             warnings.warn("dtype argument is deprecated, will be removed "
-                          "in a future release.", FutureWarning)
+                          "in a future release.", DeprecationWarning)
         if placement is None:
             placement = self.mgr_locs
         return make_block(values, placement=placement, ndim=ndim,

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -13,6 +13,7 @@
 import datetime
 import struct
 import sys
+from collections import OrderedDict
 
 import numpy as np
 from dateutil.relativedelta import relativedelta
@@ -1571,7 +1572,7 @@ def read(self, nrows=None, convert_dates=None,
                 else:
                     data_formatted.append((col, data[col]))
         if requires_type_conversion:
-            data = DataFrame.from_items(data_formatted)
+            data = DataFrame.from_dict(OrderedDict(data_formatted))
         del data_formatted
 
         self._do_convert_missing(data, convert_missing)
@@ -1609,7 +1610,7 @@ def read(self, nrows=None, convert_dates=None,
                     convert = True
                 retyped_data.append((col, data[col].astype(dtype)))
             if convert:
-                data = DataFrame.from_items(retyped_data)
+                data = DataFrame.from_dict(OrderedDict(retyped_data))
 
         if index_col is not None:
             data = data.set_index(data.pop(index_col))
@@ -1722,7 +1723,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist,
                 cat_converted_data.append((col, cat_data))
             else:
                 cat_converted_data.append((col, data[col]))
-        data = DataFrame.from_items(cat_converted_data)
+        data = DataFrame.from_dict(OrderedDict(cat_converted_data))
         return data
 
     def data_label(self):
@@ -1997,7 +1998,7 @@ def _prepare_categoricals(self, data):
                 data_formatted.append((col, values))
             else:
                 data_formatted.append((col, data[col]))
-        return DataFrame.from_items(data_formatted)
+        return DataFrame.from_dict(OrderedDict(data_formatted))
 
     def _replace_nans(self, data):
         # return data