In [6]:
import pandas as pd
import numpy as np
import itertools

In [7]:
def multiply_by_x(n, x=100):
    """
    Multiplies n by 100 (or x): (e.g. n = 5, returns [5,5,5...(100), 5]

    Parameters
    ----------
    n : int

    Returns
    -------
    list 100/x n's
    """

    return [n] * x


def get_scale(wiggle, scale_to=100):
    """

    Parameters
    ----------
    wiggle : pandas.Series
        Series of values of any length
    Returns
    -------
        Series of values that is scale (length is always 100).
    """

    # Need to adjust series such that it has at least 100 parts.
    # Required since stepper will iterate from 0.01..1 and x will
    # iterate from 0..99.
    if len(wiggle) == scale_to:  # no need to do any calculating.
        return wiggle
    elif len(wiggle) == 1:  # return 100/n of these values
        return pd.Series(
            list(
                itertools.chain.from_iterable(
                    [multiply_by_x(w, x=scale_to) for w in wiggle]
                )
            )
        )
    elif len(
            wiggle) < scale_to:  # multiply everything by scaling factor, this guarantees it is divisible by scaling factor
        wiggle = pd.Series(
            list(
                itertools.chain.from_iterable(
                    [multiply_by_x(w, x=scale_to) for w in wiggle]
                )
            )
        )

    dist = [0] * scale_to  # final series length
    x = 0  # iterate through dist list
    step = 1 / float(scale_to)  # stepper, increments increase by this number
    y = 0  # number of values in each stepwise bin

    # iterate through each value until it reaches next step, then averages
    # (step = 1% -> 2%, or 2% -> 3%, etc. if we are trying to scale to 100%)
    for pos, value in enumerate(wiggle):
        if (float(pos + 1) / len(wiggle)) < step:  # if we haven't reached the next step, add value to bin (dist[x])
            # print("{} < {}, dist[{}] = {}".format((float(pos + 1) / len(wiggle)), step, x, value))
            y = y + 1
            dist[x] = dist[x] + value
        elif (float(pos + 1) / len(wiggle) == 1):  # if we have reached the last step, break loop
            y = y + 1
            dist[x] = dist[x] + value
            break
        else:  # if we have passed the next step, divide total value in dist[x] by y (number of values) to get average of bin, then move on (iterate x)
            dist[x] = dist[x] / float(y)
            step = step + 1 / float(scale_to)
            x = x + 1
            dist[x] = value
            y = 1

    try:
        dist[x] = dist[x] / float(y)
    except ZeroDivisionError as e:
        print("Got zero series, won't scale.", e, wiggle)
    except IndexError as e:
        pass
    return pd.Series(dist)

In [8]:
s = pd.Series([0, 1, 1, 0, 1, 0])

In [10]:
scaled = get_scale(s)
scaled

0     0.000000
1     0.000000
2     0.000000
3     0.000000
4     0.000000
5     0.000000
6     0.000000
7     0.000000
8     0.000000
9     0.000000
10    0.000000
11    0.000000
12    0.000000
13    0.000000
14    0.000000
15    0.000000
16    0.166667
17    1.000000
18    1.000000
19    1.000000
20    1.000000
21    1.000000
22    1.000000
23    1.000000
24    1.000000
25    1.000000
26    1.000000
27    1.000000
28    1.000000
29    1.000000
        ...   
70    1.000000
71    1.000000
72    1.000000
73    1.000000
74    1.000000
75    1.000000
76    1.000000
77    1.000000
78    1.000000
79    1.000000
80    1.000000
81    1.000000
82    1.000000
83    0.333333
84    0.000000
85    0.000000
86    0.000000
87    0.000000
88    0.000000
89    0.000000
90    0.000000
91    0.000000
92    0.000000
93    0.000000
94    0.000000
95    0.000000
96    0.000000
97    0.000000
98    0.000000
99    0.000000
Length: 100, dtype: float64