In [8]:
import pandas as pd
from scipy import stats
import numpy as np

In [9]:
pump_data_locations = ['..\data\Engelerschans.csv', 
                       '..\data\helftheuvel.csv', 
                       '..\data\Maaspoort.csv', 
                       '..\data\oude_engelenseweg.csv', 
                       '..\data\Rompert.csv']

In [10]:
"""
Gets the mean of the most fastest times a pump has pumped while also removing outliers
:param full_df: DataFrame, the pump dataframe with 'level_diff'
:param nr_of_extremes: int, the amount of fastest hours we need to find the mean from
"""
def get_mean_fastest_pump_speed(full_df, nr_of_extremes=100):
    df = full_df.sort_values('level_diff').iloc[:nr_of_extremes]

    df['zscore'] = abs((df['level_diff'] - df['level_diff'].mean())/df['level_diff'].std(ddof=0))

    df = df[df['zscore'] < 3]
    return abs(df['level_diff'].mean())


"""
Gets the mean of the highest or lowest level of the pump while also removing outliers
:param full_df: DataFrame, the pump dataframe with the total level in the second column
:param nr_of_extremes: int, the amount of extremes to take into account for the max level calculation
"""
def get_mean_extremes_pump_level(full_df, nr_of_extremes=100, get_min=False):
    df = full_df.sort_values(by=full_df.columns[1], ascending=get_min).dropna().iloc[:nr_of_extremes]
    
    df['zscore'] = abs((df[df.columns[1]] - df[df.columns[1]].mean())/df[df.columns[1]].std(ddof=0))

    df = df[df['zscore'] < 3]
    return abs(df[df.columns[1]].mean())

In [13]:
for location in pump_data_locations:
    full_df = pd.read_csv(location)
    pump_mean = get_mean_fastest_pump_speed(full_df)
    max_level_mean = get_mean_extremes_pump_level(full_df, 50)
    min_level_mean = get_mean_extremes_pump_level(full_df, 50, True)
    print(f"For {location} pump speed: {round(pump_mean,2)} mean max level: {round(max_level_mean,2)} mean min level: {round(min_level_mean,2)}")

For ..\data\Engelerschans.csv pump speed: 69.13 mean max level: 358.97 mean min level: 20.01
For ..\data\helftheuvel.csv pump speed: 55.42 mean max level: 382.59 mean min level: 144.64
For ..\data\Maaspoort.csv pump speed: 78.19 mean max level: 484.79 mean min level: 66.7
For ..\data\oude_engelenseweg.csv pump speed: 60.07 mean max level: 384.57 mean min level: 158.74
For ..\data\Rompert.csv pump speed: 73.41 mean max level: 439.73 mean min level: 69.65
