In [61]:

def clean_data(data, threshold = 2500):
    if isinstance(data, pd.Series) is False:
        raise TypeError("Your data needs to be in a Series format.")
    else:
        #the data is in a Series, check that it is of type numeric
        v = data.values
        is_numeric = np.issubdtype(v.dtype, np.number)
        if is_numeric is False:
            raise TypeError("Your Series data needs to be numeric.")
        else:
         #the data is in the correct format, check that the threshold is an int
            if isinstance(threshold, int) is False:
                raise TypeError("Your passed threshold needs to be an int.")
            else:
                #actually clean the data
                cleaned_data = data[data.values > threshold]
    return cleaned_data

  
    
def split_data(data, split = 0.7):
    #check that our data is still a Series
    if isinstance(data, pd.Series) is False:
        raise TypeError("Your data needs to be in a Series format.")
    else:
        v = data.values
        is_numeric = np.issubdtype(v.dtype, np.number)
        if is_numeric is False:
            raise TypeError("Your data needs to be numeric.")
        else:
            #check that split is a float value
            if isinstance(split, float) is False:
                raise TypeError("Your split value needs to be a float.")
            else:
                #check that split is 0.0 < x < 1.0
                if not 0.0 < split < 1.0:
                    raise ValueError("Split value needs to be between 0.0 and 1.0")
                else:
                    #split data in time format, not randomly shuffled
                    length_training = split * len(data)
                    length_training = int(round(length_training, 0))
                    training = data[0:length_training]
                    testing = data[length_training:len(data)]
    return training, testing
                
        
           
def decompose(data, method = "bfill"):
    #ensure that our data is in the proper format
    if isinstance(data, pd.Series) is False:
        raise TypeError("Your data needs to be in a Series format.")
    else:
        #check that their data has a datetime index
        if type(data.index) is not pd.core.indexes.datetimes.DatetimeIndex:
            raise TypeError("Series index needs to be in DateTime format.")
        else:
        #ensure data is in hourly format
        #check that their method is one of bfill or ffill
            if method is not "bfill" and method is not "ffill":
                raise TypeError("Method needs to be of type either bfill or ffill.")
            else:
                #may need to add check that data.values are of type int
                hourly = data.asfreq("H", method=method)
                #decompose the data
                result = seasonal_decompose(hourly)
                trend = result.trend
                trend = result.trend.fillna(result.trend.mean())
                seasonality = result.seasonal
                seasonality = seasonality.fillna(seasonality.mean())
                resid = result.resid
                resid = resid.fillna(resid.mean())
                decomposed_df = pd.DataFrame(dict(Data = hourly.values, Trend = trend.values, 
                                                 Seasonality = seasonality.values, Noise = resid.values))
                #make the index using any one of the Timestamps, since they are equivalent
                decomposed_df.index = hourly.index    
                #plot the data as part of what is returned
                decomposed_df.plot()
            
    return decomposed_df



    

    