In [11]:
import pandas as pd
from scipy import stats
import numpy as np

In [12]:
pump_data_locations = ['..\data\Engelerschans.csv', 
                       '..\data\helftheuvel.csv', 
                       '..\data\Maaspoort.csv', 
                       '..\data\oude_engelenseweg.csv', 
                       '..\data\Rompert.csv']

In [47]:
"""
Gets the mean of the most fastest times a pump has pumped while also removing outliers
:param full_df: DataFrame, the pump dataframe with 'level_diff'
:param nr_of_extremes: int, the amount of fastest hours we need to find the mean from
"""
def get_mean_fastest_pump_speed(full_df, nr_of_extremes=100):
    df = full_df.sort_values('level_diff').iloc[:nr_of_extremes]

    df['zscore'] = abs((df['level_diff'] - df['level_diff'].mean())/df['level_diff'].std(ddof=0))

    df = df[df['zscore'] < 3]
    return abs(df['level_diff'].mean())


"""
Gets the mean of the highest level of the pump while also removing outliers
:param full_df: DataFrame, the pump dataframe with the total level in the second column
:param nr_of_extremes: int, the amount of extremes to take into account for the max level calculation
"""
def get_mean_max_pump_level(full_df, nr_of_extremes=100):
    df = full_df.sort_values(by=full_df.columns[1], ascending=False).dropna().iloc[:100]

    df['zscore'] = abs((df[df.columns[1]] - df[df.columns[1]].mean())/df[df.columns[1]].std(ddof=0))

    df = df[df['zscore'] < 3]
    return abs(df[df.columns[1]].mean())

In [48]:
for location in pump_data_locations:
    full_df = pd.read_csv(location)
    pump_mean = get_mean_fastest_pump_speed(full_df)
    max_level_mean = get_mean_max_pump_level(full_df)
    print(f"For {location} we have mean pump speed {pump_mean} per hour and mean max level {max_level_mean}")

For ..\data\Engelerschans.csv we have mean pump speed 69.1330449916618 per hour and mean max level 343.1862791402493
For ..\data\helftheuvel.csv we have mean pump speed 55.42466796616097 per hour and mean max level 350.49848606679103
For ..\data\Maaspoort.csv we have mean pump speed 78.19384385872104 per hour and mean max level 431.2285730125431
For ..\data\oude_engelenseweg.csv we have mean pump speed 60.07232021813718 per hour and mean max level 360.81809482591956
For ..\data\Rompert.csv we have mean pump speed 73.40589834870443 per hour and mean max level 415.91666873445945
